{"$schema": "https://c3voc.de/schedule/schema.json", "generator": {"name": "pretalx", "version": "2026.1.1"}, "schedule": {"url": "https://pretalx.com/pyconde-pydata-2026/schedule/", "version": "0.25", "base_url": "https://pretalx.com", "conference": {"acronym": "pyconde-pydata-2026", "title": "PyCon DE & PyData 2026", "start": "2026-04-14", "end": "2026-04-16", "daysCount": 3, "timeslot_duration": "00:05", "time_zone_name": "Europe/Berlin", "colors": {"primary": "#3778be"}, "rooms": [{"name": "Merck Plenary (Spectrum) [1st Floor]", "slug": "4989-merck-plenary-spectrum-1st-floor", "guid": "530e17cb-41ad-5ee2-be6d-395e5f131bf2", "description": "Ground floor", "capacity": 1072}, {"name": "Titanium [2nd Floor]", "slug": "4990-titanium-2nd-floor", "guid": "cdc51427-366f-58ab-a16b-720f5c46bea5", "description": "2.0234, 2nd floor", "capacity": 300}, {"name": "Helium [3rd Floor]", "slug": "4991-helium-3rd-floor", "guid": "3d7f383d-5261-53c6-9fc9-4b08d199be60", "description": "3.0789, 3rd floor", "capacity": 284}, {"name": "Platinum [2nd Floor]", "slug": "4992-platinum-2nd-floor", "guid": "89a9ebe7-abc4-5d5f-add1-c944eef3fd1e", "description": "2.0678, 2nd floor", "capacity": 263}, {"name": "Europium [3rd Floor]", "slug": "4993-europium-3rd-floor", "guid": "cf93aeb4-228c-52cb-9490-9f6d22695ff5", "description": "3.034, 3rd floor", "capacity": 161}, {"name": "Palladium [2nd Floor]", "slug": "4995-palladium-2nd-floor", "guid": "e920db12-c50a-54a3-abfb-a70ee4d618ef", "description": "2.05, 2nd floor", "capacity": 63}, {"name": "Ferrum [2nd Floor]", "slug": "4996-ferrum-2nd-floor", "guid": "4900ae9b-ef4f-5c1b-ae4c-78d0895ede32", "description": "2nd Floor", "capacity": 269}, {"name": "Dynamicum [Ground Floor]", "slug": "4997-dynamicum-ground-floor", "guid": "ea9b42b4-5478-50fc-875b-6c0d962ace50", "description": "0.04, ground floor", "capacity": 150}, {"name": "Lounge [1st Floor]", "slug": "4999-lounge-1st-floor", "guid": "c102f4af-af00-5160-88f9-9a47d1d3af6f", "description": "3.11, 3rd floor, foyer", "capacity": 80}], "tracks": [{"name": "PyCon: MLOps & DevOps", "slug": "6482-pycon-mlops-devops", "color": "#000000"}, {"name": "PyCon: Programming & Software Engineering & Testing", "slug": "6483-pycon-programming-software-engineering-testing", "color": "#000000"}, {"name": "PyCon: Python Language & Ecosystem", "slug": "6484-pycon-python-language-ecosystem", "color": "#000000"}, {"name": "PyCon: Security", "slug": "6485-pycon-security", "color": "#000000"}, {"name": "PyCon: Django & Web", "slug": "6487-pycon-django-web", "color": "#000000"}, {"name": "PyCon: Embedded Systems & Robotics", "slug": "6493-pycon-embedded-systems-robotics", "color": "#000000"}, {"name": "PyData: Data Handling & Data Engineering", "slug": "6488-pydata-data-handling-data-engineering", "color": "#000000"}, {"name": "PyData: Machine Learning & Deep Learning & Statistics", "slug": "6489-pydata-machine-learning-deep-learning-statistics", "color": "#000000"}, {"name": "PyData: Natural Language Processing & Audio (incl. Generative AI NLP)", "slug": "6490-pydata-natural-language-processing-audio-incl-generative-ai-nlp", "color": "#000000"}, {"name": "PyData: Computer Vision (incl. Generative AI CV)", "slug": "6491-pydata-computer-vision-incl-generative-ai-cv", "color": "#000000"}, {"name": "PyData: Generative AI & Synthetic Data", "slug": "6492-pydata-generative-ai-synthetic-data", "color": "#000000"}, {"name": "PyData: PyData & Scientific Libraries Stack", "slug": "6494-pydata-pydata-scientific-libraries-stack", "color": "#000000"}, {"name": "PyData: Visualisation & Notebooks", "slug": "6495-pydata-visualisation-notebooks", "color": "#000000"}, {"name": "General: Autonomous Systems & AI Agents", "slug": "6496-general-autonomous-systems-ai-agents", "color": "#000000"}, {"name": "General: Community & Diversity", "slug": "6497-general-community-diversity", "color": "#000000"}, {"name": "General: Education, Career & Life", "slug": "6498-general-education-career-life", "color": "#000000"}, {"name": "General: Ethics & Privacy", "slug": "6499-general-ethics-privacy", "color": "#000000"}, {"name": "General: Infrastructure - Hardware & Cloud", "slug": "6500-general-infrastructure-hardware-cloud", "color": "#000000"}, {"name": "General: Rust", "slug": "6504-general-rust", "color": "#000000"}, {"name": "General: Others", "slug": "6501-general-others", "color": "#000000"}, {"name": "Sponsor", "slug": "6502-sponsor", "color": "#000000"}, {"name": "Keynote", "slug": "6503-keynote", "color": "#000000"}, {"name": "Invited", "slug": "6984-invited", "color": "#000000"}], "days": [{"index": 1, "date": "2026-04-14", "day_start": "2026-04-14T04:00:00+02:00", "day_end": "2026-04-15T03:59:00+02:00", "rooms": {"Merck Plenary (Spectrum) [1st Floor]": [{"guid": "cfc5d255-f36a-5bbd-a73c-8ae3f9a722a0", "code": "BQPEUG", "id": 95764, "logo": null, "date": "2026-04-14T10:00:00+02:00", "start": "10:00", "duration": "00:30", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-95764-opening-session", "url": "https://pretalx.com/pyconde-pydata-2026/talk/BQPEUG/", "title": "Opening Session", "subtitle": "", "track": null, "type": "Plenary", "language": "en", "abstract": "Opening Session", "description": "Opening Session", "recording_license": "", "do_not_record": false, "persons": [], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/BQPEUG/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/BQPEUG/", "attachments": []}, {"guid": "790b8b5d-cc75-57de-8624-c3542a7f9bc5", "code": "BFL7MQ", "id": 89471, "logo": null, "date": "2026-04-14T10:30:00+02:00", "start": "10:30", "duration": "00:45", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-89471-from-scratch-to-scale-turning-llm-code-into-architecture-insights", "url": "https://pretalx.com/pyconde-pydata-2026/talk/BFL7MQ/", "title": "From Scratch to Scale: Turning LLM Code into Architecture Insights", "subtitle": "", "track": "Keynote", "type": "Keynote", "language": "en", "abstract": "Python has been at the center of my work in machine learning and AI for more than a decade. It is where I start from scratch, experiment with ideas, and build systems that help me understand how large language models really work.\r\n\r\nIn this keynote, we will explore how Python enables this entire journey, from defining model architectures and training loops to scaling data and computation across devices. I will also reflect on how Python continues to support both the large models of today and the evolving systems of tomorrow, even as new backends take over the heavy lifting.", "description": "Python has been at the center of my work in machine learning and AI for more than a decade. It is where I start from scratch, experiment with ideas, and build systems that help me understand how large language models really work.\r\n\r\nIn this keynote, I will look at what it means to build and study LLMs in Python today. Starting from small, from-scratch implementations, I will show how Python and PyTorch help us understand modern model architectures, compare new designs against reference code, and learn details that papers often leave out. I will then connect those implementation lessons to current LLM trends, especially the push to reduce inference costs and KV-cache pressure as reasoning models and agentic workflows need longer contexts. At the end, I will also share a practical roadmap of libraries, open projects, and learning resources for going from first principles to real-world LLM development.", "recording_license": "", "do_not_record": false, "persons": [{"code": "GVDHSU", "name": "Sebastian Raschka", "avatar": "https://pretalx.com/media/avatars/GVDHSU_gSTrbDo.webp", "biography": "Sebastian is an LLM Research Engineer with over a decade of experience in artificial intelligence. His work bridges academia and industry, including roles as a senior engineer at Lightning AI and a statistics professor at the University of Wisconsin\u2013Madison.\r\n\r\nHe is also the author of Build a Large Language Model (From Scratch).\r\n\r\nHis expertise lies in LLM research and the development of high-performance AI systems, with a strong focus on practical, code-driven implementations.", "public_name": "Sebastian Raschka", "guid": "eabcc832-a338-57f3-9df5-14e9e98ac951", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/GVDHSU/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/BFL7MQ/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/BFL7MQ/", "attachments": []}, {"guid": "bcc48b82-ac6e-5216-bda2-8b4fb124b523", "code": "88TTRY", "id": 86975, "logo": null, "date": "2026-04-14T11:45:00+02:00", "start": "11:45", "duration": "00:30", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-86975-sentinel-values-in-python-semantics-double-dispatch-and-the-limits-of-typing", "url": "https://pretalx.com/pyconde-pydata-2026/talk/88TTRY/", "title": "Sentinel Values in Python: Semantics, Double Dispatch, and the Limits of Typing", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk", "language": "en", "abstract": "Python relies heavily on special values such as `None`, `NotImplemented`, `Ellipsis`, and `dataclasses.MISSING`. These values are not incidental: they encode language semantics, enable control flow between objects, and shape API design.\r\n\r\nThis talk examines sentinel values as a first-class concept in Python. We will look at why None is often the wrong representation for absence, how NotImplemented enables double dispatch in rich comparisons, and where sentinel values appear throughout the standard library.\r\n\r\nA central focus is typing. While sentinel values are ubiquitous at runtime, Python currently has no standardized way to express them precisely in type hints. We will examine why Optional, overloads, and Literal fall short, what limited narrowing is possible today, and why creating a \u201creal\u201d custom sentinel with reliable type narrowing is still unsolved.\r\n\r\nFinally, we will discuss [PEP 661](https://peps.python.org/pep-0661/), i.e., the deferred proposal to standardize sentinel values and their typing semantics, and what its deferral means in practice. Using real-world examples, including Pydantic\u2019s experimental missing concept, this talk provides a clear mental model for sentinel values and realistic guidance for using them in typed Python codebases today.", "description": "Sentinel values are a fundamental but under-documented part of Python\u2019s design. They are used to represent absence, unsupported operations, incomplete state, and to coordinate control flow between objects. Yet, they are often treated as ad-hoc implementation details.\r\n\r\nThis talk starts by clarifying what sentinel values are and why None is frequently semantically overloaded and incorrect for modelling \u201cmissing\u201d or \u201cunset\u201d values. We then examine built-in sentinels such as `NotImplemented`, `Ellipsis`, and `dataclasses.MISSING`, with a detailed look at how `NotImplemented` enables double dispatch in equality and ordering operations.\r\n\r\nThe second half of the talk focuses on typing, where sentinel values expose fundamental tensions between Python\u2019s dynamic semantics and static type systems. We will discuss:\r\n\r\n* why Optional[T] does not mean \u201cunset\u201d\r\n* why Literal appears attractive for sentinels but rarely works in practice\r\n* what limited type narrowing is possible today and under which assumptions\r\n* why a fully reliable, user-defined sentinel with correct narrowing is currently not achievable in a portable way\r\n\r\nTo ground this in practice, we will look at real-world patterns used in production code, including Pydantic\u2019s experimental missing concept, and explain the trade-offs these designs make.\r\n\r\nFinally, we will examine [PEP 661](https://peps.python.org/pep-0661/), the proposal to standardize sentinel values and their typing semantics. We will explain what it would solve, why it was deferred, and what that deferral means for library and API authors today.\r\n\r\nThe talk concludes with concrete, honest guidelines: when sentinel values are the right tool, how to design APIs around them, and how to communicate absence clearly in typed Python code without pretending the type system can do more than it currently can.", "recording_license": "", "do_not_record": false, "persons": [{"code": "8LQU9C", "name": "Florian Wilhelm", "avatar": "https://pretalx.com/media/avatars/8LQU9C_cjSxM8b.webp", "biography": "Florian is Head of Data Science & Mathematical Modeling at inovex GmbH, an IT project center driven by innovation and quality, focusing its services on \u2018Digital Transformation\u2019. He holds a PhD in mathematics, has more than 10 years of experience in predictive & prescriptive analytics use-cases and likes everything math \ud83e\udd2f", "public_name": "Florian Wilhelm", "guid": "d8a2dd67-d397-54f5-88e9-b2c680fb4e5c", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/8LQU9C/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/88TTRY/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/88TTRY/", "attachments": [{"title": "Slides of the talk", "url": "/media/pyconde-pydata-2026/submissions/88TTRY/resources/Sentin_OrD1KNG.pdf", "type": "related"}]}, {"guid": "d7f1a483-0ac0-510a-917d-6cb76ef099dc", "code": "KLN78E", "id": 94814, "logo": null, "date": "2026-04-14T12:25:00+02:00", "start": "12:25", "duration": "00:30", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-94814-the-foundation-model-revolution-for-tabular-data", "url": "https://pretalx.com/pyconde-pydata-2026/talk/KLN78E/", "title": "The foundation model revolution for tabular data", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Sponsored Talk", "language": "en", "abstract": "Tabular data, spreadsheets organized in rows and columns, are ubiquitous across healthcare, business and finance. The fundamental prediction task of filling in missing values of a label column based on the rest of the columns is essential for thousands of use cases of high societal and commercial value. While gradient-boosted decision trees have dominated tabular data for the past 20\u2009years, we demonstrate that this is rapidly changing, with the foundation model revolution having arrived at tabular data. We will show the methods behind this and their extensions to causality, interpretability and robustness, and demo various agentic extensions.", "description": "Tabular data, spreadsheets organized in rows and columns, are ubiquitous across healthcare, business and finance. The fundamental prediction task of filling in missing values of a label column based on the rest of the columns is essential for thousands of use cases of high societal and commercial value. While gradient-boosted decision trees have dominated tabular data for the past 20\u2009years, we demonstrate that this is rapidly changing, with the foundation model revolution having arrived at tabular data. We will show the methods behind this and their extensions to causality, interpretability and robustness, and demo various agentic extensions.", "recording_license": "", "do_not_record": false, "persons": [{"code": "X9K7V8", "name": "Frank Hutter", "avatar": "https://pretalx.com/media/avatars/79EEXA_tMlkYAP.webp", "biography": "Co-Founder & CEO of Prior Labs.                         \r\nProfessor, Tabular Foundation Models and AutoML.\r\nWe\u2018re hiring: [PriorLabs.ai/careers](https://priorlabs.ai/careers)", "public_name": "Frank Hutter", "guid": "77892c7e-dbbc-520e-b54b-d477aa894431", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/X9K7V8/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/KLN78E/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/KLN78E/", "attachments": [{"title": "PresentationSlides", "url": "/media/pyconde-pydata-2026/submissions/KLN78E/resources/TFM-ta_IDb8P1a.pdf", "type": "related"}]}, {"guid": "56e401e2-3026-5f9b-bd7f-4f827a236c74", "code": "9CYWET", "id": 95765, "logo": null, "date": "2026-04-14T14:30:00+02:00", "start": "14:30", "duration": "01:15", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-95765-stop-waiting-start-shipping-real-world-strategy-for-open-source-llms", "url": "https://pretalx.com/pyconde-pydata-2026/talk/9CYWET/", "title": "Stop Waiting, Start Shipping: Real-World Strategy for Open-Source LLMs", "subtitle": "", "track": null, "type": "Panel", "language": "en", "abstract": "Chinese and American open-source LLMs are competing head-to-head \u2014 from DeepSeek and Qwen to Llama and Mistral. The model landscape is broader than ever, yet in Germany the debate still circles around waiting for the next breakthrough. Alexander Hendorf and Sebastian Raschka discuss what these models can and cannot do today, what biases to watch for, and which deployment strategies actually work in practice. The session reserves substantial time for questions and discussion with the audience.", "description": "Alexander Hendorf and Sebastian Raschka sit down for a fireside chat on the current state of open-source LLMs.\r\n\r\nWith Chinese models like DeepSeek and Qwen competing directly with Llama and Mistral, the choice of capable open-source models has never been wider \u2014 so why are so many teams still waiting for the next generation instead of building with what is already here?\r\n\r\nQuestions we want to discuss:\r\n\r\n- What role do Chinese and American OSS models play in the current competitive landscape?\r\n- Where do open-source models still fall short of proprietary ones, and where has the gap closed?\r\n- What biases should practitioners be aware of and how to handle them?\r\n- Are AI agents a fundamental shift or are we seeing diminishing returns?\r\n- What deployment strategies actually work \u2014 especially for European teams that rely on talent and domain expertise rather than hyperscaler compute?\r\n\r\nHalf the session is reserved for audience questions.", "recording_license": "", "do_not_record": false, "persons": [{"code": "GVDHSU", "name": "Sebastian Raschka", "avatar": "https://pretalx.com/media/avatars/GVDHSU_gSTrbDo.webp", "biography": "Sebastian is an LLM Research Engineer with over a decade of experience in artificial intelligence. His work bridges academia and industry, including roles as a senior engineer at Lightning AI and a statistics professor at the University of Wisconsin\u2013Madison.\r\n\r\nHe is also the author of Build a Large Language Model (From Scratch).\r\n\r\nHis expertise lies in LLM research and the development of high-performance AI systems, with a strong focus on practical, code-driven implementations.", "public_name": "Sebastian Raschka", "guid": "eabcc832-a338-57f3-9df5-14e9e98ac951", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/GVDHSU/"}, {"code": "K9DZKF", "name": "Alexander CS Hendorf", "avatar": "https://pretalx.com/media/avatars/8F38DV_FIGQ7yh.webp", "biography": "Alexander C.S. Hendorf is an independent AI and open-source strategy advisor working with companies in regulated industries. With 20+ years of hands-on experience across 50+ technologies \u2014 from the Python ecosystem to vector databases \u2014 he bridges the gap between boardroom decisions and technical execution. Alexander is a Python Software Foundation Fellow, heads the Open Source Working Group of the KI Bundesverband, serves on the board of the Python Software Verband, and has delivered 100+ talks in 15+ countries.", "public_name": "Alexander CS Hendorf", "guid": "e61ae96e-6f0d-5312-867d-6bf04eefb64f", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/K9DZKF/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/9CYWET/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/9CYWET/", "attachments": []}, {"guid": "4cb82b9e-897d-5fdd-9d43-a54540f9a520", "code": "WAJQR7", "id": 94594, "logo": null, "date": "2026-04-14T16:30:00+02:00", "start": "16:30", "duration": "01:00", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-94594-panel-evolution-revolution-or-illusion-the-future-of-python-and-coding-in-the-age-of-ai", "url": "https://pretalx.com/pyconde-pydata-2026/talk/WAJQR7/", "title": "Panel: Evolution, Revolution, or Illusion? The Future of Python and Coding in the Age of AI", "subtitle": "", "track": "General: Autonomous Systems & AI Agents", "type": "Panel", "language": "en", "abstract": "Software engineering is changing fast. With AI now writing and reasoning about code, does it still make sense to learn Python or any language at all?\r\n\r\nIs this the evolution of our craft, a true revolution, or just hype from those who benefit most? Join us to debate the future of Python, the risks of AI-driven development, and what skills will actually matter next.", "description": "Software engineering is at a crossroads. With AI systems now capable of generating, debugging, and even reasoning about code, the very definition of programming is being challenged.\r\nDoes it still make sense to invest years learning Python, or any programming language, if machines can translate natural language specifications into working software? Are we witnessing the evolution of coding into a higher-level craft, the revolution of the software industry, or merely an illusion fueled by hype from those who benefit most?\r\nThis panel moderated by Sebastian Neubauer will confront these questions head-on. We will debate whether programming languages remain essential, whether software engineers are at risk of obsolescence, or whether the demand for engineers may actually explode in ways we cannot yet imagine. We will also explore the risks of over-reliance on AI, including potential security vulnerabilities, fragile or unexplainable systems, and the loss of deep understanding of the software we build.\r\nCome prepared for uncomfortable questions, bold predictions, and no easy answers. This is a session designed to challenge assumptions, spark debate, and imagine the possible futures of Python and software engineering in an AI-assisted world.\r\n\r\nNote: Join our interactive workshop to explore the future of Python and AI-assisted coding on Wednesday . Everyone is welcome to share ideas, debate risks, the future of Python and help shape what software engineering could look like in the age of AI.", "recording_license": "", "do_not_record": false, "persons": [{"code": "NEBKEY", "name": "Sebastian Neubauer", "avatar": "https://pretalx.com/media/avatars/NEBKEY_9ioAcAl.webp", "biography": "Data scientist forever; Worked everywhere in Blue Yonder, messed with data science, built platforms, now exploring GenAI & AI agents. Known to always ask the question nobody else dared.", "public_name": "Sebastian Neubauer", "guid": "461acdf1-0d58-52ef-a400-0a008b4a35eb", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NEBKEY/"}, {"code": "GSRLAK", "name": "Markus Klein", "avatar": "https://pretalx.com/media/avatars/YLBAMT_FOcXdr9.webp", "biography": "Markus Klein is a Founding Engineer at Supermetal. He maintains open-source projects including the odbc2parquet command-line tool and the arrow-odbc Python wheels. Throughout his career, in both management and individual contributor roles, he has advocated for Continuous Delivery, Test-Driven Development, and Mob Programming. Sometimes successfully. He still finds it strange to write about himself in the third person.", "public_name": "Markus Klein", "guid": "e40bce05-dd92-5bf3-bc02-8122f5768596", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/GSRLAK/"}, {"code": "XG9398", "name": "Asya Melnik", "avatar": null, "biography": "I started as a data scientist, building ML microservices and deploying models into production. I later moved into a consulting role, where I helped adapt ML models to real customer needs, translate business problems into measurable objectives, interpret results, and monitor model performance over time.\r\n\r\nOver the years, my work gradually shifted towards GenAI. I now design and build AI agents from scratch for internal process optimisation, support colleagues in adopting GenAI and agentic AI responsibly, and promote security-aware practices in solution development. A large part of my work focuses on evaluating and monitoring agent behaviour in real environments to ensure these systems remain useful, safe, and trustworthy after deployment.", "public_name": "Asya Melnik", "guid": "2df8866d-ad5e-51b4-9809-794113a900c2", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/XG9398/"}, {"code": "ZUECPC", "name": "Serhii Sokolenko", "avatar": "https://pretalx.com/media/avatars/ZUECPC_sFofzBE.webp", "biography": "Serhii Sokolenko is a co-founder of Tower, a Pythonic platform for data flows and agents running on top of open analytical storage. Prior to founding Tower, Serhii worked at Databricks, Snowflake and Google on data processing and databases.", "public_name": "Serhii Sokolenko", "guid": "c94dd5e4-64fe-536d-bfa5-f451831b6f65", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/ZUECPC/"}, {"code": "FZKG9N", "name": "Ines Montani", "avatar": "https://pretalx.com/media/avatars/FZKG9N_7il65fA.webp", "biography": "Ines Montani is a developer specializing in tools for AI and NLP technology. She\u2019s the co-founder and CEO of [Explosion](https://explosion.ai) and a core developer of [spaCy](https://spacy.io), a popular open-source library for Natural Language Processing in Python, and [Prodigy](https://prodi.gy), a modern annotation tool for creating training data for machine learning models.", "public_name": "Ines Montani", "guid": "b60e58b3-bd41-534c-a286-22ae8481a00a", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/FZKG9N/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/WAJQR7/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/WAJQR7/", "attachments": []}, {"guid": "b7367598-48db-5122-8d36-849a13e18e87", "code": "NXEVSE", "id": 95766, "logo": null, "date": "2026-04-14T17:50:00+02:00", "start": "17:50", "duration": "01:10", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-95766-lightning-talks-1", "url": "https://pretalx.com/pyconde-pydata-2026/talk/NXEVSE/", "title": "Lightning Talks 1", "subtitle": "", "track": null, "type": "Lightning Talks", "language": "en", "abstract": "Lightning Talks 1", "description": "Lightning Talks 1", "recording_license": "", "do_not_record": false, "persons": [], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/NXEVSE/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/NXEVSE/", "attachments": []}], "Titanium [2nd Floor]": [{"guid": "93b53236-082d-5c2d-a4cf-e698424f16ff", "code": "JJDCW3", "id": 87712, "logo": null, "date": "2026-04-14T11:45:00+02:00", "start": "11:45", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-87712-python-hates-being-pid-1-writing-container-aware-code-for-kubernetes", "url": "https://pretalx.com/pyconde-pydata-2026/talk/JJDCW3/", "title": "Python Hates Being PID 1: Writing Container-Aware Code for Kubernetes", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk", "language": "en", "abstract": "On Kubernetes, your Python app runs in a hostile environment, fighting for resources in a straitjacket, bombarded with signals, and being killed and ruthlessly dragged back to life time and again. This is in stark contrast to the wonderful weather of a Linux web server or the blissful utopia of localhost. If not hardened properly, your Python app will find the burden of being containerized too hard to bear. And the result? Zombies!\r\n\r\nWhether you are a Kubernetes expert, or you just deployed your first containerized Hello World, we will together explore how the Python Interpreter, the Linux Kernel and Kubernetes interact with each other.\r\n\r\nWe will uncover why Python struggles as an init process, how Kubernetes CPU-limits fight the Global Interpreter Lock (GIL) and why Python\u2019s Garbage Collector cannot save you from sudden OOM kills. Most importantly, we will see how to identify, debug, and avoid containerized Python pitfalls. The goal of this talk is to help you stop treating your container like a server and learn to write Cloud-Native Python that knows exactly where it lives.", "description": "**The Problem** : The large-scale adoption of Kubernetes means more Python developers are now writing code that runs as a containerized workload on Kubernetes. However, most of us still write applications with a standard Linux server in mind. In a containerized environment, these assumptions are either untrue or dangerous. Python apps not hardened for a containerized environment lead to production failures that are notoriously hard to debug:\r\n- Unexplained Latency: API requests that stall for hundreds of milliseconds due to Linux CFS Quota throttling, even when monitoring shows low CPU usage.\r\n- Silent OOM Kills: Containers that vanish instantly without a traceback because they hit a Cgroup limit that the Python Garbage Collector cannot see.\r\n- Zombie Processes: Subprocesses that were never truly killed and are now exhausting the process table because Python ignores its duties as PID 1.\r\n\r\n**The Solution** : This talk will briefly get you up to speed with containerization before taking a technical deep dive into the interactions between Kubernetes, the CPython interpreter and the Linux container runtime. We will move beyond basic Dockerfile best practices and focus on hardening the application code itself to survive in a hostile Kubernetes environment.\r\n\r\n**Pre-requisites** : This talk is aimed towards intermediate to senior Python Developers and Data Engineers having basic familiarity with Docker. No advanced Kubernetes or Linux Kernel knowledge required, we will run through the foundational topics in brief.\r\n\r\n**Outline (30 Minutes)**\r\n1. Who am I? (2 mins)\r\n2. The Lie of the Container (3 mins)\r\n    - Understanding how the container runtime isolates your process and the resources it needs.\r\n3. The PID 1 Problem (4 mins)\r\n    - How the Linux kernel treats PID 1 processes and why the standard Python interpreter fails these duties.\r\n    - Present well established solutions to the problem (init: true, tini, etc) and common pitfalls.\r\n4. The CPU Quota & Memory Limit (8 mins)\r\n    - How container CPU limits in Kubernetes translate to Linux CFS (Completely Fair Scheduler) quotas. \r\n    - Visualizing how the enforcement of CFS quotas interacts with the Python GIL to cause latency spikes.\r\n    - Python\u2019s memory management and the dreaded OOM kill.\r\n5. Hardening your Python Code (8 mins)\r\n    - How to use the Cgroup file system or psutil to achieve true resource awareness.\r\n    - Strategies for avoiding CPU throttling and tuning numeric libraries (Pandas/Numpy) from attempting to use too many cores.\r\n    - Why `gc.collect()` is often insufficient and how to release memory before the OOM killer strikes.\r\n6. Conclusion & Checklist (5 mins)\r\n    - A \"Production-Ready\" checklist for Python on K8s.\r\n    - Q&A.\r\n\r\n**After this talk you will** :\r\n- Understand the lifecycle of a containerized Python app and handle shutdowns gracefully.\r\n- Fine-tune a containerized Python app for stability and avoid CPU throttling and OOM kills.\r\n- Look beyond the standard system calls to write truly resource aware Python apps.", "recording_license": "", "do_not_record": false, "persons": [{"code": "JLP8VR", "name": "Kavish Nareshchandra Dahekar", "avatar": "https://pretalx.com/media/avatars/JLP8VR_jXDK7Qp.webp", "biography": "I am a Senior Developer at SAP in Berlin. I've spent the last 8 years of my career at SAP starting with SAP's ML Foundation, DataHub, Data Intelligence and now working for AI Core. I specialise in scalable, cloud-native microservices and AI orchestration platforms. My current work focuses on developing SAP's high-availability distributed AI platform. I hold a Masters in Computer Science from IIT Guwahati, with a specialised research focus on NLP. I am also a Certified Kubernetes Administrator (CKA) and a Certified Kubernetes Security Specialist (CKS). I love to teach and in my free time love playing the guitar or working on hobby electronics projects.", "public_name": "Kavish Nareshchandra Dahekar", "guid": "7f4d43a2-6420-5f69-994f-169bebb6f6c6", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/JLP8VR/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/JJDCW3/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/JJDCW3/", "attachments": [{"title": "pycon-26-python-pid1-slides-dark", "url": "/media/pyconde-pydata-2026/submissions/JJDCW3/resources/pycon-_vWQRYOZ.pdf", "type": "related"}]}, {"guid": "42a4ea57-a626-541b-ac02-a513ff28d948", "code": "BQYTVM", "id": 85963, "logo": null, "date": "2026-04-14T12:25:00+02:00", "start": "12:25", "duration": "00:45", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-85963-beyond-stateless-why-your-web-service-architecture-is-fighting-against-performance", "url": "https://pretalx.com/pyconde-pydata-2026/talk/BQYTVM/", "title": "Beyond Stateless: Why Your Web Service Architecture is Fighting Against Performance", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk (long)", "language": "en", "abstract": "We've been told for years that stateless services are the holy grail of\r\nscalable web architectures. But what if this foundational principle is\r\nactually hurting development speed and runtime performance?\r\n\r\nCoding agents follow our example. They do what we would have done, only\r\n10 times more. They also apply the \"stateless is good\" myth.\r\n\r\nThis talk challenges the dominant paradigm by demonstrating how\r\nstateful, object-oriented programming can automatically scale to\r\nmillions of users without the typical infrastructure complexity.\r\n\r\nI'll show how keeping objects with their state in distributed memory\r\neliminates the need for explicit caching strategies, reduces database\r\nbottlenecks, and dramatically simplifies your code base. You'll see how\r\na simple Python class can transparently scale across multiple servers,\r\nhandling millions of concurrent users without implementing REST\r\nendpoints, message queues, or cache invalidation logic.\r\n\r\nSo you can guide your agent to do scalability the right way.", "description": "## The Problem Many Face\r\n\r\nEvery developer of a successful web service knows this progression: You start with a simple FastAPI or Django app. It works great locally. Then you deploy it, traffic grows, and suddenly you're working primarily on infrastructure complexity. Load balancers, cache layers, database replicas, message queues, and before you know it, your simple microservice based business logic has become a complex distributed system mesh including careful cache invalidation logic.\r\n\r\nBut what if this complexity isn't inevitable? What if it's actually the result of a historical mistake that became \"best practice\"?\r\n\r\n## Challenging the Stateless Dogma\r\n\r\nThis talk challenges a fundamental assumption of modern web architecture: that stateless services are superior for scalability. I'll demonstrate that this belief, born from the constraints of early web servers, is now actively harmful to both performance and developer productivity. The truth is: separating logic from state (the core of stateless architecture) creates most of the complexity we fight daily. Every database query, every cache lookup, every message queue: they're all workarounds for the fact that we threw away our object's state after each request.\r\n\r\n## Key Takeaways\r\n\r\n- Stateless isn't a virtue, it's a workaround: modern systems can and should maintain state efficiently across requests.\r\n- Your objects can be the cache: when objects persist in distributed memory, explicit caching becomes redundant.\r\n- Scale by writing normal Python code: the same object-oriented patterns work from prototype to web-scale.\r\n- Performance through simplicity: eliminating layers of infrastructure translation improves both latency and throughput.\r\n- Focus on business logic, not plumbing: let the framework handle distribution, persistence, and failover.\r\n\r\n## Who Should Attend\r\n\r\nPython developers who:\r\n- are building or maintaining web services,\r\n- have experienced the pain of cache invalidation,\r\n- want to scale without changing their programming model,\r\n- are curious about alternatives to microservices.\r\n\r\n## A Paradigm Shift\r\n\r\nJust as we moved from manual memory management to garbage collection, it's time to move on from manual state management. Your Python objects should live as long as they're needed, not just for the duration of a request. This isn't theoretical. Systems using this approach power gaming platforms with millions of concurrent users, financial systems requiring microsecond latency, and IoT platforms managing billions of devices. The technology exists. We just need to unlearn the \"stateless is good\" mantra.", "recording_license": "", "do_not_record": false, "persons": [{"code": "P9MRFG", "name": "Heiner Wolf", "avatar": "https://pretalx.com/media/avatars/P9MRFG_mLBtG7O.webp", "biography": "Heiner Wolf is a physicist and coder. After completing his Master\u2019s degree in particle physics at CERN, he got a PhD in computer science and is now a passionate full stack developer (C#, TypeScript, Python). Heiner has been CTO for many years, in his own startups and those of others. Alongside all sorts of good stories, he enjoys realistic future scenarios and hard science fiction. And when triggered on physics, he\u2019ll gladly rant about how fusion research should really be done.", "public_name": "Heiner Wolf", "guid": "e42901a5-e9c4-5654-8550-8d144b226cfd", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/P9MRFG/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/BQYTVM/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/BQYTVM/", "attachments": [{"title": "Beyond Stateless Heiner Wolf PyCon DE 2026", "url": "/media/pyconde-pydata-2026/submissions/BQYTVM/resources/260414_4ekL2yM.pdf", "type": "related"}]}, {"guid": "f29abfcf-da57-535f-8f7e-373c50ea4a6a", "code": "7H9DF8", "id": 88345, "logo": null, "date": "2026-04-14T14:30:00+02:00", "start": "14:30", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-88345-how-to-mix-conda-and-pip-without-causing-environmental-damage", "url": "https://pretalx.com/pyconde-pydata-2026/talk/7H9DF8/", "title": "How to mix conda and pip without causing \u201cenvironmental\u201d damage.", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk", "language": "en", "abstract": "Ever mixed conda and pip and ended up with a broken conda environment, yet, swear it worked before? This talk explains why! Learn the difference between pip and conda, what happens when you mix them and how to combine them safely using the latest community developed tools and updates in conda.", "description": "Users frequently run `pip` inside their `conda` environments, sometimes successfully, sometimes with unintentional consequences. Confusing errors and broken environments often lead users to ask: when is it safe to use `pip` in a `conda` environment, and when is it not?\r\n\r\nIn this presentation I will answer this question. \r\n\r\nI will begin by discussing the differences between `pip` and `conda` (a question conda maintainers get asked a lot!), starting with the specific use-cases of both tools. \r\nThis will include an \u201cenlightenment\u201d moment: `pip` and `conda` solve slightly different problems, one is a Python package installer, the other is a language agnostic package and environment manager.\r\n\r\nI will then explain the differences between `.conda` packages, tarballs, and Python wheels, revealing how these format differences make interoperability difficult and mixing tools unreliable.\r\nUsers end up mixing `pip` and `conda` because sometimes the packaging ecosystem leaves them no other choice. Users often report, \"I tried installing a package  with `conda`, but it didn't work, so I ran `pip install` instead and it worked\u201d. This mixing, sadly, has consequences, which I refer to as \u201cenvironmental damage\u201d. \r\nI will highlight this damage in my talk. \r\n\r\n`pip` and `conda` are two separate ecosystems but over time many community efforts (most recent being `conda-pypi`),  have tried to improve interoperability. I will explain how the latest updates in `conda` along with the features in `conda-pypi` have now made it possible to `conda install` Python wheels from PyPI directly into `conda` environments. Thereby bringing us a step closer to better interoperability. \r\n\r\nI will conclude the presentation with best-practice recommendations for using `pip` and `conda` together. \r\nBy the end of this presentation, users will have learned when to use `pip`, when to use `conda`, why they are different and how to combine them safely.\r\n\r\n\r\nHere is a link to the conda-pypi repository on GitHub: https://github.com/conda-incubator/conda-pypi \r\n\r\nTime outline of the presentation:\r\n3 mins- self introduction and introduction to the topic (what to expect)\r\n5 mins- difference between pip and conda and their use cases\r\n10 mins- different package formats, problems with mixing pip and conda\r\n5 mins- wheels support feature in conda-pypi and updates in conda\r\n5 mins- how it helps users and best practices \r\n2 mins- closing remarks", "recording_license": "", "do_not_record": false, "persons": [{"code": "GY9UGD", "name": "Mahe Iram Khan", "avatar": "https://pretalx.com/media/avatars/GY9UGD_fNKT9F4.webp", "biography": "I am a Software Engineer working at Anaconda. I have been working on the conda project for more than 3 years. My hobbies are crocheting, writing and cooking.", "public_name": "Mahe Iram Khan", "guid": "708604fa-1d83-5e45-b85b-f62be2dfd286", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/GY9UGD/"}], "links": [{"title": "Slides", "url": "https://docs.google.com/presentation/d/1XDSMG0V0zQByrjuL2H3YA3hqEYOvSsXqC5uDZLXwp-8/edit?usp=sharing", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/7H9DF8/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/7H9DF8/", "attachments": [{"title": "slide pdf", "url": "/media/pyconde-pydata-2026/submissions/7H9DF8/resources/PyConD_NfnJNlB.pdf", "type": "related"}]}, {"guid": "bc05aea9-d3d8-5375-bbe2-f1a721b15fb8", "code": "YKQ33N", "id": 87217, "logo": null, "date": "2026-04-14T15:10:00+02:00", "start": "15:10", "duration": "00:45", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-87217-destructive-testing-10-practical-ways-to-expose-hidden-application-risks", "url": "https://pretalx.com/pyconde-pydata-2026/talk/YKQ33N/", "title": "Destructive Testing: 10 Practical Ways to Expose Hidden Application Risks", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk (long)", "language": "en", "abstract": "Modern applications rarely fail in obvious ways. Instead, they break at the edges: unexpected inputs, race conditions, misused APIs, and assumptions nobody realized they were making. This talk presents ten practical and repeatable ways to intentionally break an application, using a QA mindset with a strong Python focus.\r\n\r\nThe session is designed to help QAs sharpen their investigative approach and move beyond happy-path testing, while giving developers concrete insight into where real-world failures often originate. Each \u201cway to break an application\u201d highlights a common risk area such as data handling, state management, timing, configuration, or integration boundaries.\r\n\r\nAttendees will learn how to think more destructively (in a productive way), design better tests, and recognize fragile design decisions earlier. The goal is not to assign blame, but to improve collaboration and software quality by understanding how systems actually fail in practice.", "description": "Quality assurance is not about confirming that software works \u2014 it is about discovering how it fails. This talk explores ten concrete ways to break an application on purpose, based on real-world testing patterns and common failure modes seen in modern software systems.\r\n\r\nThe focus is on practical thinking, not theory. While Python is used as the primary example language for test automation and experimentation, the concepts apply to any technology stack. The session is relevant for QAs, test engineers, and developers who want to build more resilient systems and improve cross-discipline collaboration.\r\n\r\nGoals of the Talk\r\n* Improve destructive testing and exploratory thinking for QAs\r\n* Help developers understand common blind spots in application design\r\n* Demonstrate how Python can be used effectively to probe system weaknesses\r\n* Encourage a shared quality mindset across roles", "recording_license": "", "do_not_record": false, "persons": [{"code": "WMUYJV", "name": "Pascal Puchtler", "avatar": "https://pretalx.com/media/avatars/WMUYJV_mV9O9T4.webp", "biography": "An experienced QA engineer and software developer with a strong focus on Python-based testing and test automation. Specialized in breaking applications through exploratory, risk-driven, and destructive testing approaches. With several years of experience working on complex software systems, the focus is on uncovering hidden failure modes, improving test strategies, and helping teams build more resilient applications. Passionate about bridging the gap between QA and development by sharing practical insights into how and why software fails in real-world scenarios.", "public_name": "Pascal Puchtler", "guid": "dcade1d5-0d8a-5ee3-9606-57562490ed24", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/WMUYJV/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/YKQ33N/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/YKQ33N/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/YKQ33N/resources/Pascal_NgdxFS5.pdf", "type": "related"}]}, {"guid": "91e12851-20ad-53c0-bf91-8b1fa92ea978", "code": "HKFCBM", "id": 87148, "logo": null, "date": "2026-04-14T16:30:00+02:00", "start": "16:30", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-87148-pair-share-how-formal-mentoring-pushed-rewe-analytics-to-a-new-level", "url": "https://pretalx.com/pyconde-pydata-2026/talk/HKFCBM/", "title": "Pair & Share: How formal Mentoring pushed REWE Analytics to a new level", "subtitle": "", "track": "General: Education, Career & Life", "type": "Talk", "language": "en", "abstract": "As one of Europe\u2019s largest retail corporations, REWE Group owns and manages prominent supermarket chains such as REWE and PENNY, among many other subsidiaries. In this talk I will give a brief overview of how we introduced a formal mentoring program, Pair & Share, at the central analytics department of REWE Group with its more than 150 data scientists, engineers, analysts and other colleagues. \r\n\r\nBefore Pair and Share, there was no formal process for personal, technical or methodological growth. Although there are plenty of possibilities, further training and education was self-organized and fragmented. To increase growth among our colleagues and build and strengthen inter-team exchange, we introduced the formal mentoring program, Pair & Share. \r\n\r\nThis talk will cover a brief overview of REWE Group and our analytics department followed by a motivation for Pair & Share. Afterwards I will explain how we planned the mentoring program and defined the parameters like the matching process, the time frame and how to recruit participants. I will also share my experiences of the first six months of mentoring, what kind of roadblocks but also pleasant surprises we encountered. The talk will be concluded with an outline of how we plan to continue and improve the program.", "description": "Did you ever wonder how to bring your analytics department to the next level? Do you want to help colleagues to network, learn or pass on their knowledge? And did you ever want to start your own mentoring program in a large corporation? Think no more, as I will describe in detail how we set up a mentoring program, Pair & Share, in REWE Group\u2019s analytics department, with its 150 data scientists, data engineers, analysts and other data people. As one of Europe\u2019s largest retail corporations, REWE Group owns and manages prominent supermarket chains such as REWE and PENNY, among many other subsidiaries. However, before Pair and Share there was no formal process for personal, technical or methodological growth within REWE Analytics. Although there are plenty of possibilities, further training and education was self-organized and fragmented. To increase growth among our colleagues and build and strengthen inter-team exchange, we introduced the formal mentoring program, Pair & Share. \r\n\r\nThis talk will cover a brief overview of REWE Group and our analytics department, who we are and what we do. This is followed by a description of why, while we found personal growth and training to be fine, we realized that we could do better with Pair & Share. Afterwards, I will explain how we planned the details of the mentoring program and defined the parameters like the matching process, time frame and how to recruit participants. As the first iteration of mentoring comes to an end in March 2026, I will share my experiences of the first six months of mentoring. This will include the kind of roadblocks we faced, how participants shaped their own mentoring experience and what pleasant surprises we encountered. As we measured participant satisfaction with regular pulse checks as well as many feedback sessions, I will conclude the talk with an overview of what went well and how we plan to do better with the next iteration of mentoring.", "recording_license": "", "do_not_record": false, "persons": [{"code": "8CDX7B", "name": "Axel Buddendiek", "avatar": "https://pretalx.com/media/avatars/8CDX7B_dRnkSTC.webp", "biography": "Axel Buddendiek is an astronomer turned data scientist. After finishing his PhD in 2015, Axel started working in data teams at different companies, continuously learning and building up new skills. In 2022, he joined the Analytics Department at REWE Group as a senior data scientist. When Axel is not at work, he enjoys jogging, reading, and watching football.", "public_name": "Axel Buddendiek", "guid": "fd4f6184-563d-5031-bea4-c0972248115b", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/8CDX7B/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/HKFCBM/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/HKFCBM/", "attachments": []}, {"guid": "8ad841eb-187f-510a-8982-03fb514b5414", "code": "U9KQU9", "id": 87635, "logo": null, "date": "2026-04-14T17:10:00+02:00", "start": "17:10", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-87635-building-trust-in-your-data-pipelines-with-observability", "url": "https://pretalx.com/pyconde-pydata-2026/talk/U9KQU9/", "title": "Building Trust in Your Data Pipelines with Observability", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk", "language": "en", "abstract": "In the daily work of a data engineer, building new data pipelines often takes priority, while maintaining them and ensuring their correctness becomes an afterthought. This focus can quickly turn into a pitfall: failures go undetected, incorrect data silently propagates, and complaints from stakeholders arrive before engineers notice any issues. In practice, incorporating observability into every new data pipeline helps avoid these problems and enables teams to steadily increase system complexity while maintaining trust and peace of mind.\r\n\r\nIn this talk, I introduce observability in the context of data pipelines, covering its three core pillars: metrics, alarms, and logs. We will explore concepts like the four golden signals, alarm fatigue and structured logging and how they apply to data pipelines. I will show easy to implement first steps and share real-world experiences, where improved observability helped uncover previously unknown incorrect behavior and build trust in data systems.\r\n\r\nThis talk is well suited for data engineers that had little exposure to observability and want to learn about strategies how to keep sane while managing a jungle of pipelines.", "description": "This talk explores how observability can be applied to data pipelines to improve reliability, data quality, and confidence in complex data systems.\r\n\r\nThe talk begins with an introduction to observability in the context of data engineering. It explains the three core pillars: metrics, alarms, and logs, and discusses why observability is particularly important for data pipelines, where failures are often silent and correctness issues may only surface through stakeholder complaints. \r\n\r\nThe first section focuses on metrics. It demonstrates how straightforward it can be to instrument data pipelines with basic metrics using Python. The talk then discusses which metrics are worth monitoring, adapting established concepts such as the four golden signals to data engineering use cases. A concrete example based on a near\u2013real-time event processing pipeline illustrates how fine-grained metrics can reveal systematic failures for specific event types.\r\n\r\nThe second section focuses on alerting. It addresses the challenge that engineers rarely have time to continuously inspect dashboards and therefore rely on alarms to surface important issues. The talk outlines what makes a good alarm, emphasizing that alarms should be actionable, reliable, and provide sufficient context for investigation. A scenario with excessive and noisy alarms is used to illustrate alarm fatigue and a strategy how to get out of such a situation is described.\r\n\r\nThe final section covers log messages and their importance to reason about how a pipeline ended up in a specific state. It discusses why logs are often difficult to work with in data pipelines, as they may contain a mixture of critical errors, informational messages, and low-level framework output. The talk introduces structured logging as a way to add context and make logs easier to search, filter, and aggregate. Examples include monitoring the distribution of log levels to uncover hidden issues and using centralized logging to identify dependencies between pipelines that are otherwise hard to detect.\r\n\r\nThe talk concludes by emphasizing how the three pillars of observability build trust in a data pipeline.", "recording_license": "", "do_not_record": false, "persons": [{"code": "NPTXJK", "name": "Stefan Dienst", "avatar": "https://pretalx.com/media/avatars/NPTXJK_Z7Vt13k.webp", "biography": "Stefan is a data engineer and works at Covestro in a newly established data office. He has four years of experience working on a variety of data platforms, ranging from classic ETL pipelines and data warehousing to near\u2013real-time stream processing. Before moving into data engineering, he completed a PhD in physics, where he felt in love with Python and working with data. Since then he is always curious to learn new things and share what he has learned with others.", "public_name": "Stefan Dienst", "guid": "6cbb46f7-afc5-5777-aa39-3938ef4319c4", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NPTXJK/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/U9KQU9/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/U9KQU9/", "attachments": [{"title": "The slides I will use in my presentation as a PDF.", "url": "/media/pyconde-pydata-2026/submissions/U9KQU9/resources/observ_xJg36WF.pdf", "type": "related"}]}], "Helium [3rd Floor]": [{"guid": "0b164724-3d96-5fc3-8ec0-9affeeb97343", "code": "PFXR9G", "id": 87194, "logo": null, "date": "2026-04-14T12:25:00+02:00", "start": "12:25", "duration": "00:45", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-87194-fight-your-garbage-data-implementation-of-a-pythonic-data-quality-monitoring-framework-in-pyspark", "url": "https://pretalx.com/pyconde-pydata-2026/talk/PFXR9G/", "title": "Fight your garbage data: implementation of a pythonic data quality monitoring framework in PySpark", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk (long)", "language": "en", "abstract": "The timeless phrase \u201cgarbage in, garbage out\u201d is even more important today with the growing usage of non-deterministic generative neuronal networks, which amplifies the effect of bad data quality. This presentation describes Data Quality Monitor \u2014 a tool to bring transparency into data quality and help drive real improvements. \r\n\r\nIn the talk, we'll cover what defines a successful data quality monitoring solution and share findings from our initial evaluation of available open-source frameworks. Next, we'll showcase our implementation based on DQX. DQX is a lightweight, open-source framework for performing row-level data quality checks programmatically, with business rules organized in manageable YAML files. DQX, originally developed by Databricks Labs, integrates seamlessly with PySpark, making it easy and affordable to run data quality checks within our IoT data lake. Finally, we will discuss the organizational processes and structures required to effectively respond to data quality issues.", "description": "In the talk we share our expirience from the project implemented in Q3 2025. We start with the motivation for the project, involved stakeholders and their needs. We will then define the criteria for a successful data quality monitoring solution and share findings from our evaluation of existing frameworks. We will also discuss why popular frameworks like Great Expectations or SODA did not meet our requirements. \r\n\r\nNext, we will demonstrate our implementation based on DQX\u2014a lightweight, open-source Python library designed for traceable, row-level data quality checks before and after data is persisted. DQX, developed and maintained by Databricks labs, allows developers to concentrate on the core implementation while providing business users YAML files for maintenance of business rules. Furthermore, DQX\u2019s seamless integration with PySpark enables efficient and cost-effective quality monitoring within our IoT data lake. \r\n\r\nFinally, we move beyond the code to the organisational reality. We will discuss how we embedded Data Quality Monitor into the organisation and share our opinion on the hard questions: who is responsible for maintaining rules? who monitors the results? \r\n\r\n**Talk outline** \r\n\r\n* Motivation for the project \r\n\r\n     * Initial situation and objectives   \r\n\r\n* Framework evaluation \r\n\r\n     * Evaluation criteria for a successful data quality monitoring \r\n\r\n     * Comparison of available frameworks \r\n\r\n* Our implementation with DQX \r\n\r\n     * How to use built-in data quality checks \r\n\r\n     * How to add custom data quality checks \r\n\r\n     * Automated rule generation with DQX Profiler \r\n\r\n     * Output and visualisation options \r\n\r\n     * Python project structure \r\n\r\n* Embedding in organisation \r\n\r\n     * Rule maintenance \r\n\r\n     * How to communicate data quality issues \r\n\r\n* Summary  \r\n\r\n \r\n\r\n**Key takeaways** \r\n\r\n* Understanding of most important criteria when choosing the framework for data quality monitoring from perspective of a data engineer and an architect \r\n\r\n* Understanding of DQX framework \r\n\r\n* Ideas how to integrate data quality monitoring into organisations.", "recording_license": "", "do_not_record": false, "persons": [{"code": "7LKG3C", "name": "Rostislaw Krassow", "avatar": "https://pretalx.com/media/avatars/7LKG3C_38IhA6u.webp", "biography": "Rostislaw, a data architect at RATIONAL AG, specializes in distributed databases, the Apache Hadoop ecosystem and Azure cloud. He leverages his expertise to maintain the enterprise Data & Analytics platform for IoT data, where his daily work involves reconciling diverse stakeholder perspectives to deliver sustainable solutions.", "public_name": "Rostislaw Krassow", "guid": "71ebf006-4a4d-5dee-9f40-ea3970e419c9", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/7LKG3C/"}, {"code": "SZXX8F", "name": "Joshua Finger", "avatar": "https://pretalx.com/media/avatars/SZXX8F_OyUGGE7.webp", "biography": "Joshua is a Data Engineer at inovex GmbH dedicated to building robust, scalable data products. Utilizing his foundation as a Full Stack Software Engineer, he applies rigorous software engineering principles to ensure every data solution is high-quality, maintainable, and efficient.", "public_name": "Joshua Finger", "guid": "f025ed88-d388-5ab5-8e72-fea9897ae116", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/SZXX8F/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/PFXR9G/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/PFXR9G/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/PFXR9G/resources/PyData_eg6wqDt.pdf", "type": "related"}]}, {"guid": "80400987-b959-5b3e-b230-f9625ceec70e", "code": "WSNBD9", "id": 87896, "logo": null, "date": "2026-04-14T14:30:00+02:00", "start": "14:30", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-87896-hype-hope-or-headache-making-sense-of-genai-llms-and-ai-agents-with-anecdotal-evidence", "url": "https://pretalx.com/pyconde-pydata-2026/talk/WSNBD9/", "title": "Hype, Hope, or Headache? Making Sense of GenAI, LLMs, and AI Agents with Anecdotal Evidence", "subtitle": "", "track": "General: Autonomous Systems & AI Agents", "type": "Talk", "language": "en", "abstract": "After nearly 20 years in data science, from MLPs, SVMs, and random forests to deep learning, I\u2019ve seen many \u201crevolutions\u201d come and go. The current tectonic shift around GenAI and LLMs feels different from previous hype cycles. Even with some understanding how these things work, I am still blown away by the stream of stunning new capabilities. But they also introduce new kinds of risks that go far beyond technical performance. This talk offers a pragmatic, experience-driven perspective on GenAI in industrial settings, including supply chains and the emerging wave of AI agents. We\u2019ll disentangle real opportunities from snake oil, especially where hype-driven promises meet senior management expectations. An anti-bullshit take on the possibilities ahead, with honesty, anecdotes, and (for those who know me, of course) a bit of humor.", "description": "After nearly 20 years in data science I\u2019ve seen many \u201crevolutions\u201d come and go: neural networks, SVMs, bayesian statistics, random forests, XGBoost and deep learning. Each came with bold promises, and each eventually settled into a realistic place in production systems (read: became boring). Generative AI, however, feels fundamentally different.\r\n\r\nIn this talk, I\u2019ll share my view *why* the current GenAI hype stands apart from previous cycles: technically, culturally, and organizationally. Even with some understanding how these things work, I am still blown away by the stream of stunning new capabilities. This is not a \u201cGenAI is bad\u201d rant. Instead, it\u2019s a critical attempt to understand the shift we\u2019re seeing, and the risks that come with it if we don\u2019t adjust our thinking.\r\n\r\nUsing industrial examples such as supply chains (just because I work in this field), but also personal experience, I\u2019ll show where LLM-based approaches still have serious limitations today, and where GenAI can realistically add value. We\u2019ll disentangle different categories of risk from technical fragility, evaluation problems and mere costs to organizational overconfidence and misuse.\r\n\r\nA big part of the talk dives into the rapidly emerging field of AI Agents. We\u2019ll explore what AI agents actually are, where they make sense today, and where the current hype is just snake oil, particularly to senior decision-makers who may underestimate complexity, costs, and failure modes.\r\n\r\nThe goal of this talk is not to slow innovation, but to enable better decisions. If we want GenAI to be a success in real-world systems, we need to understand both the change it represents and the limits it still has. \r\n\r\nAn anti-bullshit take on the possibilities ahead, with honesty, anecdotes, and (for those who know me, of course) a bit of humor.", "recording_license": "", "do_not_record": false, "persons": [{"code": "NEBKEY", "name": "Sebastian Neubauer", "avatar": "https://pretalx.com/media/avatars/NEBKEY_9ioAcAl.webp", "biography": "Data scientist forever; Worked everywhere in Blue Yonder, messed with data science, built platforms, now exploring GenAI & AI agents. Known to always ask the question nobody else dared.", "public_name": "Sebastian Neubauer", "guid": "461acdf1-0d58-52ef-a400-0a008b4a35eb", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NEBKEY/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/WSNBD9/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/WSNBD9/", "attachments": []}, {"guid": "3c141c5a-6158-5448-af03-9047a1d1069a", "code": "HBFL78", "id": 87825, "logo": null, "date": "2026-04-14T15:10:00+02:00", "start": "15:10", "duration": "00:45", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-87825-demystifying-parallel-programming-in-python-from-cpu-to-quantum-processors-including-gpu-and-tpu", "url": "https://pretalx.com/pyconde-pydata-2026/talk/HBFL78/", "title": "Demystifying Parallel Programming in Python: from CPU to quantum processors, including GPU and TPU", "subtitle": "", "track": "PyData: PyData & Scientific Libraries Stack", "type": "Talk (long)", "language": "en", "abstract": "This talk provides a beginner-friendly overview of Python\u2019s parallel programming ecosystem. You\u2019ll discover the key libraries and techniques\u2014JIT compilation, multithreading, multiprocessing, distributed computing, HPC/grid computing, and even a first look at quantum programming\u2014to help you write faster, more efficient code, regardless of your hardware.", "description": "# Demystifying Parallel Programming in Python\r\n\r\n## Understanding the Hardware Basics\r\n\r\n* A gentle introduction to modern processors: What are CPUs, GPUs, TPUs, and quantum processors?\r\n* Essential terminology explained: cores, hyper-threading, cache memory, multithreading, multiprocessing, multitasking, SIMD, NUMA, and more\u2014no prior knowledge required!\r\n\r\n## Parallel Programming Techniques for Beginners\r\nA practical overview of Python\u2019s parallel programming tools, organized by approach:\r\n\r\n* Just-In-Time (JIT) compilation: Speed up your code without changing your workflow\r\n* Multithreading: Do more at once, and removing the GIL with Python 3.13+\r\n* Multiprocessing: Use all your CPU cores\r\n* Distributed computing: Scale your code across multiple machines\r\n* Quantum programming: A first look at the future of computing\r\n\r\n## Hands-On Examples\r\n\r\n* JIT compilation made easy: PyPy, Numba, and JAX\r\n* The GIL and Python 3.13: What\u2019s changing and why it matters\r\n* Distributed computing for everyone: Celery and Dask on HPC clusters\r\n* GPU computing for beginners: CuPy, cuDF, and Numba\r\n* Your first quantum \u201cHello World\u201d: A taste of the quantum revolution\r\n\r\n## Conclusion\r\n\r\nBy the end of this talk, you\u2019ll have a clear map of Python\u2019s parallel programming landscape.\r\nNo experience needed\u2014just bring your curiosity and let\u2019s explore together!", "recording_license": "", "do_not_record": false, "persons": [{"code": "HYCGGE", "name": "Ga\u00ebl Pegliasco", "avatar": "https://pretalx.com/media/avatars/HYCGGE_ncfzjF3.webp", "biography": "Python Developer & Trainer\r\nSpecializing in Machine Learning and Parallel Computing with NumPy, Pandas, Scikit-Learn, TensorFlow, PyTorch, MPI, Dask, and more.", "public_name": "Ga\u00ebl Pegliasco", "guid": "0b00da41-57e1-54a0-95bb-bb813dded3a3", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/HYCGGE/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/HBFL78/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/HBFL78/", "attachments": [{"title": "Talk presentation", "url": "/media/pyconde-pydata-2026/submissions/HBFL78/resources/Presen_3Au7uX7.pdf", "type": "related"}, {"title": "PresentationNotebooks", "url": "/media/pyconde-pydata-2026/submissions/HBFL78/resources/pycon-_TGn5YUM.zip", "type": "related"}]}, {"guid": "324a192a-1773-58e2-983c-5288591b625a", "code": "PK8XNB", "id": 88343, "logo": null, "date": "2026-04-14T16:30:00+02:00", "start": "16:30", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-88343-come-for-the-code-stay-for-the-people", "url": "https://pretalx.com/pyconde-pydata-2026/talk/PK8XNB/", "title": "Come for the Code, Stay for the People.", "subtitle": "", "track": "General: Community & Diversity", "type": "Talk", "language": "en", "abstract": "\"Come for the language, stay for the community.\" If you've been around Python long enough, you've heard this before. I don't know when I first heard it, but I know exactly when I understood it.\r\n\r\nThis talk is a personal reflection on seventeen years within the Python community\u2014from my first tentative steps as a volunteer to organising conferences myself. It's a story about discovering that Python was always about more than code. It's about the people, the values, and the unexpected ways a community can shape a career and a life.\r\n\r\nThis isn't just my story. It's a story I've seen repeated in countless faces at registration desks, in hallway conversations, in first-time speakers finding their voice. I want to talk about what I've learned about kindness, mentorship, and the quiet power of feeling like you belong somewhere.\r\n\r\nI'll end with an open question: as the ways we connect continue to evolve, how do we preserve what matters while welcoming a new generation?\r\n\r\nIf you're new to this community and wondering what all the fuss is about, this talk is especially for you.", "description": "I have a confession to make: after seventeen years in the Python community and countless technical talks attended and organized, this is the first time I'm putting myself out there to talk about community itself. It feels vulnerable. It feels necessary.\r\n\r\n**How it started**\r\n\r\nMy journey began the way many do\u2014volunteering. Stuffing badge holders, directing people to rooms, answering the same question about Wi-Fi passwords a hundred times. It wasn't glamorous, but it was transformative. Volunteering was my first taste of what it means to contribute to something larger than myself, and it opened doors I didn't even know existed.\r\n\r\nBack then, I came for the code. I had no idea I'd stay for the people.\r\n\r\n**More than code**\r\n\r\nThrough seventeen years, the Python community taught me things I carry with me everywhere. Things that have nothing to do with syntax or libraries.\r\n\r\nThe value of patience and kindness when someone asks a \"basic\" question\u2014because we were all beginners once. The importance of explicit inclusion, because \"everyone is welcome\" means nothing without deliberate action. The power of mentorship, both giving and receiving. The understanding that community health requires active maintenance, not passive hope.\r\n\r\nThis is what \"stay for the people\" actually means.\r\n\r\n**Who this talk is for**\r\n\r\nHaving attended and organized Python conferences for years, I've noticed something consistent: there are always newcomers. People experiencing their first Python event, unsure of what to expect, wondering if they belong. This talk is for them.\r\n\r\nBut it's also for anyone thinking about community engagement\u2014whether in Developer Relations, open source maintainership, or simply as someone who cares about the spaces they inhabit.\r\n\r\n**Looking forward**\r\n\r\nI don't have all the answers. I want to end with questions rather than conclusions. How do we engage with a generation that communicates differently? How do we preserve depth in an age of fragmented attention? What can newcomers teach us about building community in ways we haven't imagined?\r\n\r\nMy hope is that this talk sparks conversations that continue long after I leave the stage. And honestly? I hope to revisit this topic in ten years and see how wrong\u2014or right\u2014we were.", "recording_license": "", "do_not_record": false, "persons": [{"code": "S3GNBU", "name": "Valerio Maggio", "avatar": "https://pretalx.com/media/avatars/S3GNBU_323cs4e.webp", "biography": "Valerio Maggio has been wandering around the Python community for thirteen years. He started as a volunteer, somehow ended up organising conferences like PyCon Italy, PyData, EuroPython, and EuroSciPy, and has given more talks than he can remember. He's a researcher and open-source contributor who cares about open science and good software practices. Also an unapologetic nerd\u2014the kind who plays D&D and still believes Magic: The Gathering was better when cards had proper frames and the stack was a new thing (_if you're a player too, you know what I mean_). He drinks unreasonable amounts of tea and coffee.", "public_name": "Valerio Maggio", "guid": "78939915-227f-5f14-99fd-52e1eac75300", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/S3GNBU/"}], "links": [{"title": "Slides", "url": "https://speakerdeck.com/leriomaggio/come-for-the-code-stay-for-the-commnunity", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/PK8XNB/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/PK8XNB/", "attachments": []}], "Platinum [2nd Floor]": [{"guid": "17f8f69c-f841-5fe3-a892-7615e5970ee9", "code": "EE39VN", "id": 87682, "logo": null, "date": "2026-04-14T11:45:00+02:00", "start": "11:45", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-87682-reaching-the-next-level-of-abstraction-meta-classes-and-what-they-enable", "url": "https://pretalx.com/pyconde-pydata-2026/talk/EE39VN/", "title": "Reaching the next level of abstraction: meta classes and what they enable", "subtitle": "", "track": "PyCon: Python Language & Ecosystem", "type": "Talk", "language": "en", "abstract": "Python is especially powerful due to its deep meta programming capabilities. In this talk, I give an overview of one example: meta classes. I show how you can use them to customize class creation, ensure data integrity, or define your own syntactic sugar for classes.", "description": "Python is accessible and easy, but what makes it especially fun and powerful are its deep meta programming capabilities. One salient example are meta classes, which allow us to deeply hook into the class creation process. But, they seem quite complex at first glance, which may have deterred you so far from exploring them. In my talk, I want to alleviate your uncertainty and give you concrete examples of how meta classes work and what they enable you to do. We will look at using them to customize class creation, ensure data integrity by adding custom validators, or defining custom syntactic sugar that reduces boilerplate.\r\n\r\nOutline:\r\n* Programming and meta programming\r\n* Everything is an object\r\n* Higher-order functions\r\n* Meta class basics: customizing class creation and enforcing constraints\r\n* Advanced example: custom syntactic sugar\r\n* With great power comes great responsibility", "recording_license": "", "do_not_record": false, "persons": [{"code": "8BUF9Y", "name": "Valentin Zieglmeier", "avatar": "https://pretalx.com/media/avatars/8BUF9Y_GIw5y71.webp", "biography": "I work as a software consultant at TNG Technology Consulting. Previously, I completed a doctorate (Dr. rer. nat.) in Computer Science at the Technical University of Munich (TUM) in the area of software engineering where I taught a course on advanced Python programming.", "public_name": "Valentin Zieglmeier", "guid": "21470214-fff1-5098-8d80-54620d1bed07", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/8BUF9Y/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/EE39VN/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/EE39VN/", "attachments": []}, {"guid": "d0c31730-e7e7-5599-9460-27ff4463037d", "code": "WQGXJ3", "id": 85992, "logo": null, "date": "2026-04-14T12:25:00+02:00", "start": "12:25", "duration": "00:45", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-85992-exploring-germany-s-urban-geography-with-census-and-openstreetmap-data", "url": "https://pretalx.com/pyconde-pydata-2026/talk/WQGXJ3/", "title": "Exploring Germany's Urban Geography with Census and OpenStreetMap Data", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk (long)", "language": "en", "abstract": "When conducting studies of the urban form, an important resource many researchers turn to is the massive OpenStreetMap dataset. But, as extensive as this dataset is, it lacks one very important aspect about the cities it covers: the people who live there. In this talk, I show you how to add this missing element to your research by bringing in German Census data to create rich analysis capable of answering some of the most pressing issues facing our cities today. I exemplify this by walking you through my own research in urban geography and sustainability with a study of how equitably distributed emergency care hospitals are in cities across Germany. Throughout, we look at how Python and PostgreSQL can be used as effective tools to enable this research and keep it organized.", "description": "By the end of this talk, audience members will be empowered with the tools they need to help identify and bring light to important problems affecting their cities. To achieve this, I show how to combine data on urban structure from OpenStreetMap and demographic data from the German Census in PostgreSQL. Once the data is gathered, I then show how to do the actual analysis and present the findings with Python.\r\n\r\nThe presentation will be broken up into the following sections:\r\n\r\n**Laying the foundation**\r\n\r\nThe first step is creating an organized database that will serve as the data source for the rest of the study. I show how to use \"PgOSM Flex\" for this plus a tool that I wrote in Python to make it easy to import German Census data into PostgreSQL.\r\n\r\n**Asking meaningful questions**\r\n\r\nWith all the data in place, it's time to formulate a research question to drive our analysis. Formulating a meaningful research question can keep our analysis on track and much better organized. To get there, we explore the data we have available and consider the types of questions we can actually answer.\r\n\r\n**Analyze and present**\r\n\r\nNow that we have a clear question in mind, we'll construct the queries we need to generate the data necessary for our analysis. Once exported from PostgreSQL, we perform the analysis and generate the final reports using popular scientific libraries in Python.\r\n\r\n**Final thoughts**\r\n\r\nTo conclude the talk, I share how this analysis could be extended by including even more datasets. I also discuss the limitations of these types of studies while offering practical advice on how you can make a positive impact with your research.", "recording_license": "", "do_not_record": false, "persons": [{"code": "HA3SES", "name": "Travis Hathaway", "avatar": "https://pretalx.com/media/avatars/HA3SES_DibVdln.webp", "biography": "Wearer of many hats, but some of my favorite are Python enthusiast, social science researcher and amateur musician. Currently based in Berlin, Germany where I work as a senior software engineer and am an active participant in the conda open source community. I'm also an organizer of the Python Users Berlin group. Feel free to reach out via LinkedIn!", "public_name": "Travis Hathaway", "guid": "f6266d43-60d4-53fa-9dd6-c2463925c2e6", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/HA3SES/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/WQGXJ3/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/WQGXJ3/", "attachments": []}, {"guid": "f29ad3a4-c4cc-5565-87a8-ceab973dbc6b", "code": "KBGXKC", "id": 94009, "logo": null, "date": "2026-04-14T14:30:00+02:00", "start": "14:30", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-94009-making-my-apache-spark-talk-more-interesting-using-ai", "url": "https://pretalx.com/pyconde-pydata-2026/talk/KBGXKC/", "title": "Making my Apache Spark\u2122 talk more interesting using AI", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Sponsored Talk", "language": "en", "abstract": "Writing talks is hard, but being a good conference speaker is even harder. Resultantly, this talk is recursive: I'll take a talk previously written for a London data science meetup on using Apache Spark and Apache Kafka to build ML data processing pipelines, and revamp it using Snowflake's Cortex Code CLI!", "description": "In this talk, we'll walk through a basic Apache Spark data pipeline which reads in an image dataset, processes it, and detects raccoons. That said, sponsored talks are always boring: let's see what we can do to spice things up using AI! We'll use Snowflake's Cortex Code CLI coding agent together to improve the talk live, taking suggestions from the audience as we go!\r\n\r\nAttendees to the talk can expect to learn the following:\r\n- What Apache Spark is, what it excels at, and how to set up a basic cluster\r\n- How to use HuggingFace ViT (vision transformer) to run a basic computer vision setup\r\n- A little bit about Snowflake's new coding agent, Cortex Code CLI (the part where we advertise at you, but I promise it will be fun)\r\n- Building a basic Streamlit app\r\n- .. and whatever other fun we get up to together!\r\n\r\nJoin for a session full of fun experimentation with interesting tools \u2013 and learn a bit about data pipelines too! This session is suitable for beginner to intermediates!coding agent, live!", "recording_license": "", "do_not_record": false, "persons": [{"code": "KVW9PZ", "name": "Celeste Horgan", "avatar": "https://pretalx.com/media/avatars/KVW9PZ_xJNiKyO.webp", "biography": "Celeste Horgan is a Sr. OSS Developer Advocate and OSPO Lead at Snowflake. Previous roles include work at Aiven, The Linux Foundation, Stripe and commercetools. She has worked in open source since 2020, is a former contributor to the Kubernetes project, and currently immersed in the Postgres open source ecosystem. Her work has been featured in the New York Times and she regularly speaks internationally at technical conferences.", "public_name": "Celeste Horgan", "guid": "10bff601-53ec-5708-bde9-89c09049598a", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/KVW9PZ/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/KBGXKC/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/KBGXKC/", "attachments": []}, {"guid": "a2a10c5f-698d-5068-9ae5-f948401ad49c", "code": "UUHYUS", "id": 87951, "logo": null, "date": "2026-04-14T15:10:00+02:00", "start": "15:10", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-87951-asyncio-vs-threads-who-survives-in-the-no-gil-era", "url": "https://pretalx.com/pyconde-pydata-2026/talk/UUHYUS/", "title": "AsyncIO vs Threads: who survives in the No-GIL Era?", "subtitle": "", "track": "PyCon: Python Language & Ecosystem", "type": "Talk", "language": "en", "abstract": "AsyncIO vs threads isn't about \"which is faster\" - it's about scheduling, memory, and the kind of load you run. We'll unpack what threads and asyncio do under the hood (OS scheduler vs event loop + epoll), run practical benchmarks, and show why many \"async\" libraries still rely on thread pools (aiofiles, Motor, Django bridges). Then we'll repeat the same tests on Python 3.14's free-threaded (no-GIL) build and discuss when an interpreter upgrade can beat an async rewrite.", "description": "Concurrency in Python is full of stereotypes: \"threads are useless because of the GIL\", \"async is always faster\", \"just make everything async\". This session replaces opinions with mechanics and measurements, and updates the story for Python 3.14's free-threaded (no-GIL) build.\r\n\r\nWhat we'll cover\r\n\r\n1) How things actually work under the hood\r\n- A Python thread is an OS thread (pthread_create/clone). The OS scheduler runs it like any other thread - the GIL only matters when Python bytecode executes.\r\n- asyncio is also scheduling: one OS thread, many Tasks, cooperative switching at await, and readiness notifications via epoll/select.\r\n\r\n2) Why IO-heavy workloads often look \"equally fast\" in threads and asyncio\r\n- both models hide IO latency by switching while waiting;\r\n- the real difference shows up in scalability and cost: per-thread memory/stack + OS limits vs lightweight Tasks.\r\n\r\n3) When \"async\" is secretly a thread pool\r\n- aiofiles delegates file operations to run_in_executor();\r\n- Motor (async MongoDB driver) runs the synchronous PyMongo core in a ThreadPoolExecutor;\r\n- frameworks like Django must bridge sync and async worlds (sync_to_async), adding overhead and sharp edges.\r\n\r\n4) Benchmarks that mirror real services\r\n- 100 / 1,000 / 10,000 concurrent IO waits: why \"10k threads\" fails but \"10k tasks\" is fine;\r\n- memory and CPU overhead comparison (what you pay for concurrency);\r\n- a microservice-style endpoint (FastAPI-like) in sync/threaded vs async mode.\r\n\r\n5) What changes with free-threading (no-GIL)\r\n- a high-level view of what CPython changes to make it possible;\r\n- rerunning the same benchmark with and without the GIL;\r\n- when an interpreter upgrade can deliver \"async-rewrite-level\" gains for mixed CPU+IO workloads.\r\n\r\nTakeaways\r\n- a practical checklist for choosing threading vs asyncio vs multiprocessing;\r\n- performance vs resource-usage intuition you can apply to real services;\r\n- guidance on how to read \"async\" claims in library docs.", "recording_license": "", "do_not_record": false, "persons": [{"code": "NXW3BB", "name": "Igor Anokhin", "avatar": "https://pretalx.com/media/avatars/NXW3BB_38MzuLu.webp", "biography": "I have been working with Python for over eight years, although I started programming back in school.\r\n\r\nI began with small personal projects, then worked with several startups, gaining hands-on experience with real-world systems.\r\n\r\nSince 2021, I have been part of the K2 Cloud development team, focusing on building and scaling production Python services in AWS-like cloud platform.", "public_name": "Igor Anokhin", "guid": "699638c6-883d-5c6b-875e-988bc931879e", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NXW3BB/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/UUHYUS/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/UUHYUS/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/UUHYUS/resources/AsyncI_fIYZjbh.pdf", "type": "related"}]}, {"guid": "5663a525-5b84-5368-92c1-762407677943", "code": "JPTTMK", "id": 92669, "logo": null, "date": "2026-04-14T16:30:00+02:00", "start": "16:30", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-92669-the-art-of-the-optimal-a-pythonic-approach-to-complex-decision-making", "url": "https://pretalx.com/pyconde-pydata-2026/talk/JPTTMK/", "title": "The Art of the Optimal: A Pythonic Approach to Complex Decision-Making", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Sponsored Talk", "language": "en", "abstract": "As Python developers, we frequently tackle complex decision-making problems by writing custom scripts and heuristic algorithms. While a standard greedy algorithm might provide a quick, intuitive fix, it rarely finds the best possible solution\u2014often leaving significant efficiency, performance, and cost-savings on the table.\r\n\r\nIn this talk, we will explore the untapped power of mathematical optimization. We will start with a classic operations challenge. You will see firsthand how a standard rule-based Python heuristic compares to a mathematical optimization model, and how rigorously defining constraints and objectives can guarantee a globally optimal solution.\r\n\r\nBut optimization isn't just for traditional logistics! We will also bridge the gap to Machine Learning. We will demonstrate how optimization techniques can be utilized as a powerful verification step for ML models, such as calculating the minimum pixel changes required to trick a neural network into a misclassification.\r\n\r\nWhile we can only scratch the surface of these vast topics, you will walk away with a fresh perspective on problem-solving. Whether you are automating business operations or building robust ML pipelines, you will learn when to graduate from basic heuristics and start leveraging the \"art of the optimal\".", "description": "As Python developers, we frequently tackle complex decision-making problems by writing custom scripts and heuristic algorithms. While a standard greedy algorithm might provide a quick, intuitive fix, it rarely finds the best possible solution\u2014often leaving significant efficiency, performance, and cost-savings on the table.\r\n\r\nIn this talk, we will explore the untapped power of mathematical optimization. We will start with a classic operations challenge: the Paintshop Problem. You will see firsthand how a standard rule-based Python heuristic compares to a mathematical optimization model, and how rigorously defining constraints and objectives can guarantee a globally optimal solution.\r\n\r\nBut optimization isn't just for traditional logistics! We will also bridge the gap to Machine Learning. We will demonstrate how optimization techniques can be utilized as a powerful verification step for ML models, such as calculating the minimum pixel changes required to trick a neural network into a misclassification.\r\n\r\nWhile we can only scratch the surface of these vast topics, you will walk away with a fresh perspective on problem-solving. Whether you are automating business operations or building robust ML pipelines, you will learn when to graduate from basic heuristics and start leveraging the true \"art of the optimal.\"", "recording_license": "", "do_not_record": false, "persons": [{"code": "PUH8YU", "name": "Justine Broihan", "avatar": "https://pretalx.com/media/avatars/KFHVDV_dUit7pK.webp", "biography": "Justine is a Senior Projects and Consulting Specialist at GAMS Software GmbH, where she bridges the gap between complex mathematics and practical software solutions. With a PhD in Operations Research and six years of experience in academic research and teaching, she now focuses on the end-to-end delivery of real-world optimization projects. For the past three years, Justine has been helping clients design, build, and deploy robust decision-making systems. She is passionate about showing developers how to move beyond basic heuristics and leverage true mathematical optimization to solve their most complex challenges.", "public_name": "Justine Broihan", "guid": "7d4bba1b-6f7d-573f-b5e9-2cf1389eb605", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/PUH8YU/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/JPTTMK/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/JPTTMK/", "attachments": [{"title": "Slides - The Art of the Optimal", "url": "/media/pyconde-pydata-2026/submissions/JPTTMK/resources/The_Ar_z4Vmhk3.pdf", "type": "related"}]}, {"guid": "9bfc0d52-eec9-5a71-add6-52710a42177b", "code": "8YGQZC", "id": 95191, "logo": null, "date": "2026-04-14T17:10:00+02:00", "start": "17:10", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-95191-type-errors-for-better-agent-assisted-development", "url": "https://pretalx.com/pyconde-pydata-2026/talk/8YGQZC/", "title": "Type Errors for Better Agent-Assisted Development", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Sponsored Talk", "language": "en", "abstract": "Type annotations aren't just for humans anymore. As AI coding agents write more Python, type checkers offer something unique: fast, concrete diagnostics about what went wrong and where. In this talk, I explore connecting Pyrefly to Claude Code, feeding type errors back to the agent as it works, and whether this is the missing feedback signal for agentic development.", "description": "As AI coding agents take on larger Python tasks, a practical question emerges: what's the best way to catch the bugs they introduce? Tests are thorough but slow. Linting is fast but shallow. Type checking occupies an interesting middle ground: deep enough to catch semantic errors, fast enough to run on every edit, and concrete enough to tell the agent exactly what to fix.\r\n\r\nIn this talk, I explore connecting Pyrefly, a Python type checker built at Meta, to Claude Code. I'll walk through integration options and discuss practical considerations like token costs and setup complexity. Whether you're building tools for AI agents or using them in your daily work, you'll leave with a clearer picture of where type checking fits in the agentic development loop.", "recording_license": "", "do_not_record": false, "persons": [{"code": "ZDYGBP", "name": "Kyle Into", "avatar": "https://pretalx.com/media/avatars/9SJD7L_k1gnGJG.webp", "biography": "Kyle is a Software Developer at Meta focused on developer tooling and static analysis. For the past four years he has worked to improve Python language services. Kyle is passionate about building tools that make developers' lives easier, especially in dynamic languages like Python.", "public_name": "Kyle Into", "guid": "975f9999-b40a-522f-a85a-86a42965bdb3", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/ZDYGBP/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/8YGQZC/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/8YGQZC/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/8YGQZC/resources/PyCon__IVjJHRx.pdf", "type": "related"}]}], "Europium [3rd Floor]": [{"guid": "50a8e4e4-67b7-5789-b1a6-5ad8ed05556a", "code": "AWFFUS", "id": 93818, "logo": null, "date": "2026-04-14T11:45:00+02:00", "start": "11:45", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-93818-kickstart-coding-at-scale-how-project-template-automation-unlocks-developer-productivity", "url": "https://pretalx.com/pyconde-pydata-2026/talk/AWFFUS/", "title": "Kickstart Coding at Scale: How Project Template Automation Unlocks Developer Productivity", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Sponsored Talk", "language": "en", "abstract": "As your company grows, so does your software landscape. Different CI configurations, inconsistent linting rules, varying packaging approaches: every new project reinvents the wheel. Scaffolding tools like cookiecutter help with the initial setup \u2014 but what happens six months later, when best practices have evolved, and your template has moved on? That\u2019s where most approaches fall apart. And a centralized, \u201cmagic\u201d pipeline is no better \u2014 it\u2019s opaque, brittle, and leaves no room for customization. Using Copier, we built a standardized yet customizable project template \u2014 a paved road that guides developers without boxing them in. But the real game changer is what comes after: Copier\u2019s built-in update mechanism lets us propagate template improvements to hundreds of existing projects. A GitHub bot runs monthly, opens Pull Requests with the latest changes, and a Streamlit dashboard tracks adoption across the organization. Attendees will learn how to build flexible templates, automate ongoing maintenance at scale, and manage version drift \u2014 so developers can focus on writing code instead of fighting boilerplate.", "description": "**The Problem**\r\nAs organizations scale, their repository count grows \u2014 and with it, the diversity of project setups. Different CI configurations, inconsistent linting rules, varying packaging approaches: every new project reinvents the wheel. Developers spend valuable time on boilerplate instead of writing code. Another approach \u2014 a centralized, \"magic\" build pipeline \u2014 trades one problem for another: it's opaque, brittle, and leaves no room for project-specific needs. We illustrate this with a concrete example: pre-commit configuration.\r\n\r\n**A Paved Road with Copier**\r\nThe Python tool Copier goes beyond one-time scaffolding \u2014 it\u2019s a lifecycle management tool. When the template evolves, copier update merges improvements into existing projects, respecting local customizations. This is what sets it apart from cookiecutter and similar tools. We built an internal project template that generates CI workflows, pre-commit configuration, conda packaging, documentation scaffolding, and more \u2014 all customizable through simple yes/no questions during setup. Crucially, projects can deviate from the template whenever needed, without breaking the update mechanism. This section includes a live demo.\r\n\r\n**Automated Migration at Scale**\r\nA template is only useful if projects stay up to date. We built a GitHub bot that runs monthly across all repositories in our organization, executes copier update, and opens Pull Requests with the changes. Merge conflicts are minimized by encouraging teams not to diverge too far from the template. For the conflicts that do arise, mergiraf helps with resolution \u2014 but maintainers may still need to step in.\r\n\r\n**Tracking Progress with a Dashboard**\r\nTo answer \"how many projects are up-to-date?\", we built a Streamlit dashboard that shows the template version for each repository, with search filters and charts. This gives the team visibility into adoption progress and helps identify repositories that are falling behind.\r\n\r\n**Lessons Learned**\r\nWe share practical lessons from rolling this out across a large organization \u2014 what worked, what's still challenging, and where we see current limitations.\r\n\r\n**Takeaways**\r\nAttendees will learn how to:\r\n- Use Copier to create and continuously update project templates that standardize without locking developers in.\r\n- Automate template updates across repositories via a GitHub bot and automated Pull Requests.\r\n- Use a dashboard to track which projects are up-to-date and which are lagging.\r\n- Reduce \"boilerplate fatigue\" so teams can focus on shipping code.", "recording_license": "", "do_not_record": false, "persons": [{"code": "EHUYXK", "name": "Yannik Tausch", "avatar": "https://pretalx.com/media/avatars/T3Z3TY_akF1ZZb.webp", "biography": "Yannik is a software engineer at QuantCo, working on client-facing projects and on internal developer tooling and infrastructure. He studied Computer Science at KIT and contributes to the conda-forge ecosystem.", "public_name": "Yannik Tausch", "guid": "c00cd3ae-a1b9-5c09-a5aa-2e75197b0fb6", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/EHUYXK/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/AWFFUS/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/AWFFUS/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/AWFFUS/resources/2026-0_5OKaQ8N.pdf", "type": "related"}]}, {"guid": "73172b43-66f0-5a89-b5e2-6ccc11f3e820", "code": "NF7MKB", "id": 86106, "logo": null, "date": "2026-04-14T12:25:00+02:00", "start": "12:25", "duration": "00:45", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-86106-programming-quantum-networks-in-python", "url": "https://pretalx.com/pyconde-pydata-2026/talk/NF7MKB/", "title": "Programming Quantum Networks in Python", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Invited Talk", "language": "en", "abstract": "Quantum networks connect quantum devices including quantum computers, enabling applications not realizable in classical networks, such as secure quantum computing in the cloud and quantum key distribution. These networks are now moving from theory to reality, and as part of the Quantum Internet Alliance, we are actively building a prototype quantum network in Europe, driven by applications developed in Python.  \r\n\r\nIn this talk, we will introduce quantum networking and demonstrate how to program quantum network applications in Python by walking through the quantum teleportation protocol. We'll conclude by sharing resources so that you can begin experimenting with quantum network programming yourself. No prior quantum experience required.", "description": "Quantum networks connect quantum devices including quantum computers, enabling applications not possible in classical networks, such as secure quantum computing in the cloud and quantum key distribution. These networks are now moving from theory to reality, and as part of the Quantum Internet Alliance, we are actively building a prototype quantum network in Europe, driven by applications developed in Python.  \r\n\r\nEven though quantum systems are governed by the rules of quantum mechanics, you don't need to be an expert in quantum physics to start programming them!  \r\n\r\nDeveloping applications for quantum networks reveals new challenges. For example, unlike in classical networks where data is copied and retransmitted, quantum information cannot be copied. Once lost, it is irretrievable. This motivates a new networking primitive for transferring data, the quantum teleportation protocol.  \r\n\r\nIn this talk, we will walk through the quantum teleportation protocol step-by-step using the NetQASM SDK and the SquidASM simulator, Python tools developed by our research group for quantum network programming and simulation. We'll conclude by sharing resources so that you can begin experimenting with quantum network programming yourself. No prior quantum experience required.", "recording_license": "", "do_not_record": false, "persons": [{"code": "WWGHWA", "name": "Samuel Oslovich", "avatar": "https://pretalx.com/media/avatars/WWGHWA_92B2qlh.webp", "biography": "Samuel Oslovich is a PhD candidate in the group of Stephanie Wehner at QuTech, Delft University of Technology, the Netherlands. His research focuses on benchmarking, scheduling, and improving the performance of near-term quantum networks, using Python-based simulation tools such as NetQASM, SquidASM, and Qoala-Sim. He holds a Master's in Computer Science and a Bachelor's in Computer Science and Engineering from the University of Connecticut, USA.", "public_name": "Samuel Oslovich", "guid": "8645278b-5d87-5ed0-a77e-9fd86ece0cbc", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/WWGHWA/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/NF7MKB/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/NF7MKB/", "attachments": []}, {"guid": "e3d73d5d-093a-51bb-95be-3f7a19a698d7", "code": "333HDN", "id": 94522, "logo": null, "date": "2026-04-14T14:30:00+02:00", "start": "14:30", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-94522-from-hard-problems-to-proven-solutions-solving-decision-problems-with-gurobi", "url": "https://pretalx.com/pyconde-pydata-2026/talk/333HDN/", "title": "From Hard Problems to Proven Solutions: Solving Decision Problems with Gurobi", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Sponsored Talk", "language": "en", "abstract": "Join us as we demonstrate how to formulate and solve hard decision problems with Gurobi. You\u2019ll learn practical modeling techniques that integrate naturally with NumPy, SciPy.sparse, and pandas. We\u2019ll show how mathematical optimization computes reliable solutions with provable guarantees \u2014 enabling robust, transparent decision-making.", "description": "Many real-world applications require making the best possible decisions under complex constraints \u2014 whether in scheduling, resource allocation, routing, or planning. These problems quickly become difficult as the number of interacting choices grows.\r\n\r\nThis session introduces mathematical optimization as a practical tool for solving such problems. Using Gurobi, we demonstrate how to formulate decision problems and compute solutions that satisfy all constraints and come with clear guarantees about their quality.\r\n\r\nYou\u2019ll see how to express optimization models using familiar data structures such as NumPy arrays, SciPy.sparse matrices, and pandas DataFrames.\r\n\r\nBy the end of the session, you\u2019ll have an understanding of how to approach modeling and solving complex decision problems \u2014 and how optimization can be used to support reliable, data-driven decisions.", "recording_license": "", "do_not_record": false, "persons": [{"code": "WHE7N9", "name": "Silke Horn", "avatar": "https://pretalx.com/media/avatars/QJDXKB_Me8fsIF.webp", "biography": "Dr. Silke Horn is a Mathematical Optimization QA Engineer with the Gurobi Optimizer team. She began her journey at Gurobi in 2018 in the technical support team and transitioned to R&D in 2024. She holds a Ph.D. in Mathematics from TU Darmstadt (Germany) and has many years of experience in academic teaching and software development.", "public_name": "Silke Horn", "guid": "ffe42e3e-13d3-566e-bef2-8b40c472d1ab", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/WHE7N9/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/333HDN/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/333HDN/", "attachments": []}, {"guid": "34420a62-2918-5c4f-b675-47c3d1a0011a", "code": "QVLTKD", "id": 93060, "logo": null, "date": "2026-04-14T16:30:00+02:00", "start": "16:30", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-93060-python-in-climate-tech-vehicle-to-grid", "url": "https://pretalx.com/pyconde-pydata-2026/talk/QVLTKD/", "title": "Python in Climate Tech: Vehicle-to-Grid", "subtitle": "", "track": "General: Others", "type": "Sponsored Talk", "language": "en", "abstract": "This talk dives into how Python helps us to bridge the gap between automotive and energy industries. Learn how Python helps in enabling Vehicle-to-Grid and therefore the bi-directional integration of EV batteries into the power grid, enabling further use and growth of renewable energies, stabilizing power grids and enhancing the accessibility of electric mobility.", "description": "At The Mobility House Energy, our mission is to enable a zero-emission future by connecting the worlds of mobility and energy. By intelligently integrating electric vehicle batteries into the power grid, we unlock flexibility that supports renewable energy expansion, enhances grid stability, and makes electric mobility more accessible and affordable.\r\n\r\nIn this talk, we share how Python became a key enabler on our journey to delivering Vehicle-to-Grid solutions at scale. From early simulations and prototyping to operating production-grade energy systems, Python supports us across the entire development lifecycle. It allows us to rapidly validate ideas, process and analyze complex energy and mobility data, and deploy robust services that are battle-tested in the real-world and on energy markets.\r\n\r\nWe will also explore how adopting Python in production reshaped our collaboration model. Data scientists and software engineers now work closer together, sharing tools, codebases, and responsibilities. At the same time, we will openly discuss the technical and organizational challenges we encountered\u2014from performance bottlenecks to system integration\u2014and the practical solutions that helped us overcome them.", "recording_license": "", "do_not_record": false, "persons": [{"code": "JPCWXQ", "name": "Christopher Sedlaczek-Bock", "avatar": null, "biography": "After finishing my PhD in high energy physics, I worked as software developer and as solution architect on projects in various industries. In the end I ended up at The Mobility House Energy, because I want to work towards a zero-zero future. Nowadays I am working as the Head of Tech VGI.", "public_name": "Christopher Sedlaczek-Bock", "guid": "87c88f91-8f0e-5414-a798-777d817618f2", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/JPCWXQ/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/QVLTKD/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/QVLTKD/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/QVLTKD/resources/PyConD_KGyDIbL.pdf", "type": "related"}]}], "Palladium [2nd Floor]": [{"guid": "b48a7ee5-c3ba-5aae-b6ed-3e73fec8a761", "code": "GVHZW9", "id": 85973, "logo": null, "date": "2026-04-14T11:45:00+02:00", "start": "11:45", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-85973-solving-marketplace-cold-start-at-scale-with-ranking", "url": "https://pretalx.com/pyconde-pydata-2026/talk/GVHZW9/", "title": "Solving Marketplace Cold Start at Scale with Ranking", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk", "language": "en", "abstract": "Cold start is a critical bottleneck for marketplaces: new items lack behavioral signals and reviews, so ranking models under-expose them, delaying the very signals needed to rank them well. This talk shares practical solutions developed at scale for a travel marketplace, including guaranteed exposure at key positions, efficient real-time re-ranking, and targeted boosting for unactivated items. Attendees will learn how experiment-driven iteration shaped a robust system that accelerates early traction for new items without sacrificing overall marketplace health.", "description": "Cold start cripples two\u2011sided marketplaces: new items lack behavioral signals and social proof, ranking models under\u2011expose them, which delays the very signals needed to rank them well. This talk shares our journey to break down this loop at GetYourGuide, a marketplace for travel experiences. We evolved our exploration/activation framework over the past three years with three complementary interventions: guaranteed exposure at strategic positions, a real\u2011time reranker to allocate that exposure efficiently under tight latency budgets, and guardrail boosting for unactivated items when primary assessment slots are empty. \r\n\r\nThe talk is a pragmatic case study: we\u2019ll show how experiment\u2011led exploration shaped the system over the last 3 years. We will share what worked, what did not, and how we managed trade-offs between short-term revenue and long-term marketplace health. Attendees will leave with a blueprint for safely accelerating early traction in their own marketplaces, combining learning\u2011to\u2011rank with exposure guarantees without sacrificing overall business health.", "recording_license": "", "do_not_record": false, "persons": [{"code": "EZMJWT", "name": "Theodore Meynard", "avatar": "https://pretalx.com/media/avatars/EZMJWT_io5jCH5.webp", "biography": "Theodore Meynard is a data science manager at GetYourGuide.He leads the evolution of their ranking algorithm, helping customers to find the best activities to book and locations to explore. Beyond work, he is one of the co-organizers of the Pydata Berlin meetup and the conference. When he is not programming, he loves riding his bike and looking for the best bakery-patisserie in town.", "public_name": "Theodore Meynard", "guid": "86973d97-e18a-5002-9b99-7690509f6220", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/EZMJWT/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/GVHZW9/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/GVHZW9/", "attachments": [{"title": "slides", "url": "/media/pyconde-pydata-2026/submissions/GVHZW9/resources/202604_yIEXBTs.pdf", "type": "related"}]}, {"guid": "c04744ad-4aa7-5fd8-8769-fabaf10826f4", "code": "DVCKHF", "id": 87289, "logo": null, "date": "2026-04-14T12:25:00+02:00", "start": "12:25", "duration": "00:45", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-87289-personalized-restaurant-recommendations-at-scale-combining-transformer-with-gradient-boosted-ranking", "url": "https://pretalx.com/pyconde-pydata-2026/talk/DVCKHF/", "title": "Personalized Restaurant Recommendations at Scale combining Transformer with Gradient-Boosted Ranking", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk (long)", "language": "en", "abstract": "Wolt\u2019s Universal Venue Ranker (UVR) is a large-scale, sequence-aware ranking model for personalized restaurant recommendations, deployed across more than 30 countries. UVR replaces three previously independent models\u2014Neural Collaborative Filtering, a second-pass ranker, and a first-time-user model\u2014by combining a transformer with a gradient-boosted decision tree for ranking.\r\n\r\nThe model follows a two-stage design. In the first stage, an encoder-style transformer learns a personalized user state representation from historical restaurant purchase sequences enriched with spatiotemporal signals such as time and location. In the second stage, a CatBoostRanker uses the transformer output as an input feature alongside additional user-, venue-, user\u2013venue-, and delivery-specific features to score and rank candidate venues.\r\n\r\nIn this talk, we present the model and service architecture, the training and evaluation setup, and both offline and online results from a multi-country online A/B test, demonstrating significant improvements in global conversion rate and new venue trial rate. We also share practical lessons from deploying and operating a multi-stage ranking model under strict latency constraints at global scale.", "description": "Personalized restaurant ranking is a core machine learning problem in food delivery platforms, requiring models to balance relevance, exploration, latency, and robustness across highly heterogeneous markets. In this talk, we present UVR (Universal Venue Ranker), Wolt\u2019s production ranking model for restaurant recommendations, currently deployed in more than 30 countries.\r\n\r\nUVR unifies the capabilities of three previously separate models\u2014Neural Collaborative Filtering (NCF), a second-pass ranker, and a first-time-user (FTU) model\u2014into a single, sequence-aware ranking approach. Beyond improving recommendation quality, this consolidation significantly reduced model complexity, operational overhead, and long-term maintenance cost.\r\n\r\nThe model follows a two-stage architecture implemented using widely adopted Python-based machine learning technologies, including PyTorch, CatBoost, and Flyte. The first stage is an encoder-style transformer trained with a classification loss on a next-purchase prediction task. It learns a compact user state representation from historical restaurant purchase sequences enriched with spatiotemporal information, such as purchase time and user location. This stage outputs a personalized venue relevance score.\r\n\r\nThe second stage is a CatBoostRanker, trained with a learning-to-rank loss on grouped venue requests. It combines the transformer-derived score with a rich set of additional features, including user-specific attributes, venue metadata, user\u2013venue interaction features, and delivery-related signals. This separation of objectives\u2014classification for representation learning and ranking for final scoring\u2014proved critical for both model performance and training stability.\r\n\r\nWe will walk through the end-to-end training and evaluation pipeline, covering feature construction, offline validation using ranking metrics, and a multi-country online A/B testing setup. UVR delivered significant and substantial improvements in global conversion rate and new venue trial rate, a key driver of long-term user retention. We will discuss how offline improvements translated into online gains.\r\n\r\nA dedicated section of the talk focuses on production and serving architecture, including low-latency inference and orchestration of training and deployment workflows using Flyte. We also share hard-earned lessons from training a multi-stage ranking model, such as preventing data leakage between models trained with different objectives and on different data as well as handling cold-start.\r\n\r\nFinally, we outline our roadmap toward extending UVR into a cross-domain ranking model for both restaurants and stores, enabling knowledge transfer across domains while preserving strong personalization guarantees.", "recording_license": "", "do_not_record": false, "persons": [{"code": "AA99AD", "name": "Marcel Kurovski", "avatar": null, "biography": "Senior Applied Scientist in Wolt's Personalization Team working on Venue and Item Ranking and Recommendation. Show Host of Recsperts - Recommender Systems Experts, the Podcast Show with industry and academia experts in Recommender Systems. Building Recommenders and Personalization Solutions with Python for various industries since 9+ years as well as creator and instructor of Python RecSys Training.", "public_name": "Marcel Kurovski", "guid": "731fb5cf-896e-5075-8a56-f51049ed27fc", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/AA99AD/"}, {"code": "8BCJ9W", "name": "Steffen Klempau", "avatar": "https://pretalx.com/media/avatars/8BCJ9W_oYOabWW.webp", "biography": null, "public_name": "Steffen Klempau", "guid": "15236e22-134a-5bd9-886e-901ae6a39345", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/8BCJ9W/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/DVCKHF/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/DVCKHF/", "attachments": [{"title": "20260414_PyCon2026_RecSys_UVR_Talk", "url": "/media/pyconde-pydata-2026/submissions/DVCKHF/resources/202604_TglHwqo.pdf", "type": "related"}]}, {"guid": "a93ea8cd-a5aa-56f8-beff-ea19b6d894d5", "code": "LYCBNT", "id": 87649, "logo": null, "date": "2026-04-14T15:10:00+02:00", "start": "15:10", "duration": "00:45", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-87649-what-breaks-when-automatic-speech-recognition-systems-go-multilingual", "url": "https://pretalx.com/pyconde-pydata-2026/talk/LYCBNT/", "title": "What Breaks When Automatic Speech Recognition Systems Go Multilingual", "subtitle": "", "track": "PyData: Natural Language Processing & Audio (incl. Generative AI NLP)", "type": "Talk (long)", "language": "en", "abstract": "Building machine learning models for audio deepfake detection seems straightforward until datasets span multiple languages, such as Hindi, Korean, Mandarin, and German. In practice, multilingual Automatic Speech Recognition (ASR) systems often fail in production because language-specific acoustic variations and assumptions about the processing pipeline break down at scale.\r\n\r\nThis talk examines the engineering challenges of building a multilingual deepfake detection system using a Python-centric pipeline. It covers practical issues encountered during large-scale audio preprocessing, including memory-efficient data loading, resumable feature-extraction workflows, and validation strategies designed to prevent cross-lingual leakage. The session also shares lessons from deploying a multilingual ASR-based system, with a focus on pipeline structure, evaluation correctness, and operational robustness in real-world settings.", "description": "In a multilingual Automatic Speech Recognition (ASR) dataset containing over 440,000 audio samples, preprocessing methods that were effective for one language often failed silently for others. This resulted in shifts in acoustic features, misleading validation outcomes, and prolonged jobs that failed due to assumptions that held true only in monolingual contexts. This presentation examines the issues that arise when extending ASR systems to multilingual data, using a real-world deepfake detection system that includes Hindi, Korean, Mandarin, and German. It addresses the engineering challenges encountered while developing and operating a Python-based pipeline at scale.\r\n\r\nThe session will discuss practical issues in large-scale audio processing, including the creation of memory-efficient data loaders, the design of workflows that support resumable preprocessing and feature extraction, and strategies for managing long-running jobs to avoid redundant computations. Additionally, it will cover validation strategies for multilingual ASR systems, emphasizing that language imbalance and shared pipelines can lead to cross-lingual leakage, which skews evaluation results if not explicitly addressed.\r\n\r\nKey takeaways include:\r\n1. Multilingual ASR pipelines reveal language-specific issues that are not present in monolingual systems.\r\n2. Scalable audio processing requires memory-efficient and resumable Python workflows.\r\n3. Cross-lingual evaluation necessitates explicit control over language imbalance and leakage.", "recording_license": "", "do_not_record": false, "persons": [{"code": "8RV9AV", "name": "Rashmi Nagpal", "avatar": "https://pretalx.com/media/avatars/8RV9AV_wNlmFun.webp", "biography": "Rashmi is a AI Research Scientist at Poseidon and a researcher at MIT CSAIL, working in the intersection of cybersecurity and artificial intelligence. She has six years of industrial experience, having brought ideas to life at pre-seed startups and contributed to impactful redesigns and features at established industry giants. Beyond coding, Rashmi finds inspiration in capturing the wonders of the cosmos through her telescope and engaging in board games with friends.", "public_name": "Rashmi Nagpal", "guid": "2d9e07b8-d883-5493-8485-0f2ede026518", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/8RV9AV/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/LYCBNT/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/LYCBNT/", "attachments": []}, {"guid": "bbf3b5f2-5ebf-55eb-9b3f-cce579ee938e", "code": "99UMEL", "id": 87046, "logo": null, "date": "2026-04-14T16:30:00+02:00", "start": "16:30", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-87046-when-space-weather-breaks-your-gps-building-an-explainable-early-warning-system", "url": "https://pretalx.com/pyconde-pydata-2026/talk/99UMEL/", "title": "When Space Weather Breaks Your GPS: Building an Explainable Early Warning System", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk", "language": "en", "abstract": "Have you ever happened to use GPS and realised that it is not working properly? The Sun could be responsible.\r\n\r\nIn this talk, I present a **real-world machine learning forecasting system** designed to predict a Space Weather phenomenon affecting GNSS accuracy and radio communications. The system is based on **CatBoost** and integrates data from space- and ground-based observations. **SHAP** is used to debug model behaviour and to build trust in model outputs. The talk focuses on **model design and evaluation choices**, showing how interpretability and uncertainty-aware forecasting can be combined in a real-time operational pipeline.", "description": "**Space Weather** doesn\u2019t just produce beautiful auroras: it can silently disrupt navigation systems, radio links, and satellite-based technologies we rely on every day.\r\n\r\nTravelling Ionospheric Disturbances (TIDs) are wave-like structures in the ionosphere that affect GNSS accuracy and HF communications. From an ML perspective, forecasting TIDs is a challenging rare-event prediction problem involving imbalanced data and heterogeneous physical inputs.\r\n\r\nIn this talk, I will present an operational machine learning approach developed within the T-FORS project to forecast TID occurrence over Europe. The model is built using **CatBoost** and integrates data from space- and ground-based observations.\r\n\r\nThe talk focuses on **model design and evaluation choices**. In particular, I will show how **SHAP** can be used to debug model behaviour, validate feature relevance, and build trust in predictions in a high-risk operational context.\r\n\r\nAlong the way, I\u2019ll share practical engineering lessons on:\r\n- handling class imbalance,\r\n- incorporating domain knowledge into ML pipelines,\r\n- producing **uncertainty-aware outputs** via **Conformal Prediction**, and\r\n- running **interpretable models in real-time forecasting systems**.\r\n\r\nThe talk is aimed at data scientists and ML practitioners interested in applied forecasting, interpretable models, uncertainty quantification and ML at the boundary between data and physics.\r\n\r\n---\r\n\r\n**Talk outline**\r\n- 0-4: What is Space Weather and why should we care\r\n- 4-7: Framing TID forecasting as an ML problem\r\n- 7-10: Model design with CatBoost\r\n- 10-13: Explainability with SHAP\r\n- 13-18: Uncertainty quantification with Conformal Prediction\r\n- 18-22: Cost-sensitive learning and real-time operations\r\n- 22-25: Lessons learned\r\n- 25-30: Q&A", "recording_license": "", "do_not_record": false, "persons": [{"code": "FRRAE7", "name": "Vincenzo Ventriglia", "avatar": "https://pretalx.com/media/avatars/FRRAE7_bJYDfI9.webp", "biography": "A results-driven data professional, focused on hype-free solutions tailored to business needs.\r\n\r\nI currently create value at the **National Institute of Geophysics and Volcanology**, where I develop machine learning models in the **Space Weather** domain. My work is complemented by finding the hidden stories in data and make them accessible to stakeholders. I studied Physics in Italy (Napoli) and Germany (Frankfurt am Main), previously worked in Analytics within the strategic division of the world's largest professional services network, as well as in the Data Science department of Italy\u2019s leading publishing group.\r\n\r\nI am also an organiser of **PyData Roma Capitale**, actively involved in building the local Python and data science community. Outside of work, I enjoy theatre, discussing finance, and learning new languages.", "public_name": "Vincenzo Ventriglia", "guid": "524ac8af-2da0-540e-946f-1676f3146ee6", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/FRRAE7/"}], "links": [{"title": "Personal Website", "url": "https://viventriglia.github.io/portfolio/", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/99UMEL/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/99UMEL/", "attachments": []}, {"guid": "a27c351c-572a-51d1-8543-5b1d84e29adf", "code": "3UHPZB", "id": 87696, "logo": null, "date": "2026-04-14T17:10:00+02:00", "start": "17:10", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-87696-it-works-on-my-machine-why-llm-apps-fail-users-not-tests", "url": "https://pretalx.com/pyconde-pydata-2026/talk/3UHPZB/", "title": "It Works on My Machine: Why LLM Apps Fail Users (Not Tests)", "subtitle": "", "track": "PyData: Natural Language Processing & Audio (incl. Generative AI NLP)", "type": "Talk", "language": "en", "abstract": "LLM applications frequently pass tests but fail users in production. This talk examines the gap between evaluation metrics and user experience through three lenses: **Expectations** (what \"working\" means to users), **Functional** (system-level vs. component-level success), and **Operational** (real-world reliability).\r\n\r\nDrawing from production experience, we'll share scenarios of expectation mismatches, silent failures, and undetected drift\u2014plus practical strategies for bridging the gap. The core message: evaluation should answer whether your system serves users, not whether it passes tests.", "description": "You've deployed an LLM application. Your tests show that it's working. The metrics look good. Then a user says **it's broken.**\r\n\r\nThis happens more often than you would expect.\r\n\r\nIn this talk, we'll share our experience of building and maintaining LLM applications, and discuss what we've learned about the discrepancy between evaluation results and user experience.\r\n\r\nWe will explore three dimensions of evaluation through the lens of user experience:\r\n\r\n## Expectations: What does 'working' actually mean to your users?\r\n\r\nSometimes the gap between tests and reality comes down to expectations. Questions that seem obviously hard to users turn out to be easy for the LLM\u2014and vice versa. Understanding this mismatch is the first step to building systems that users actually trust.\r\n\r\n## Functional: Does the system do what it's supposed to do?\r\n\r\nWhen you're working with LLMs, individual components might pass tests while the whole system fails. With prompts, model parameters, evaluation criteria, metadata, and ever-growing datasets all interacting, the complexity compounds quickly.\r\n\r\n## Operational: Does it remain reliable in real-world conditions?\r\n\r\nIn this section, we'll share practical lessons from operating LLM applications in production: how we use observability tools like Opik to monitor model behavior, how telemetry helps us understand actual usage patterns, and how dedicated validation endpoints allow us to detect issues in on-premises deployments before users do.\r\n\r\nWe'll discuss real-life scenarios we've encountered, such as when users expected different results to those delivered by our system, when external changes affected the system silently, and when performance drifted in ways that our metrics didn't detect.\r\n\r\nThis isn't a talk about frameworks or tools (even though we'll mention a few). It's about the human element of evaluation: **ensuring that the system we built serves the people using it.**\r\n\r\nWhether you're just starting out with LLM applications or running them at scale, you'll probably recognize these scenarios. We'll share the strategies and patterns that we've developed, not as prescriptive rules, but as a starting point for your own approach.\r\n\r\n## Outline\r\n\r\n1. Why users report the LLM application is broken while it passes every test\r\n2. Three dimensions of the problem\r\n    * Expectations\r\n    * Functional\r\n    * Operational\r\n3. Real-life scenarios\r\n4. Our current strategies and patterns\r\n5. Evaluation = understanding if the system serves users, not proving it's good", "recording_license": "", "do_not_record": false, "persons": [{"code": "UZ9VMC", "name": "Thomas Prexl", "avatar": "https://pretalx.com/media/avatars/UZ9VMC_2Yt8kzl.webp", "biography": "Thomas builds LLM applications that create business impact. He co-founded neunzehn innovations GmbH to bring generative AI into companies that need it.\r\n\r\nBefore that, he ran startup support in Heidelberg\u2014designing accelerators, connecting founders with money and know-how, and launching events like Neurons & Neckar, Sensors & Data Hackathon, and Startup Weekend Rhein-Neckar. Earlier: marketing and business development in electrical engineering and diagnostics.\r\n\r\nHe studied at Mannheim, got his doctorate at Basel, teaches at both Heidelberg and Mannheim, and talks about AI when someone asks him to.", "public_name": "Thomas Prexl", "guid": "81bc4e70-31b4-534d-a4fa-a18fae5359fb", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/UZ9VMC/"}, {"code": "SGPHNQ", "name": "Frank Rust", "avatar": "https://pretalx.com/media/avatars/SGPHNQ_8KDUHv7.webp", "biography": "Frank is deeply passionate about technological advancements and a co-founder of neunzehn innovations, a company specializing in AI solutions. His professional background combines entrepreneurial experience\u2014having established an innovation and strategy consultancy focused on strategy and deep tech\u2014with several years at a major software corporation. Throughout his tenure in the software industry, he contributed to multiple product and service launches, working across various teams to bring new offerings to market. Outside the office, he enjoys discovering new horizons in the camper van.", "public_name": "Frank Rust", "guid": "6b355da9-4d17-53d7-af9c-3cad78a46200", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/SGPHNQ/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/3UHPZB/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/3UHPZB/", "attachments": []}], "Ferrum [2nd Floor]": [{"guid": "17a96488-cb41-5842-9acf-9af6e68d4632", "code": "RRLTBU", "id": 91149, "logo": null, "date": "2026-04-14T11:45:00+02:00", "start": "11:45", "duration": "01:30", "room": "Ferrum [2nd Floor]", "slug": "pyconde-pydata-2026-91149-from-prompt-to-production-how-to-use-ai-code-assistants-for-python-data-systems", "url": "https://pretalx.com/pyconde-pydata-2026/talk/RRLTBU/", "title": "From Prompt to Production: How to use AI Code Assistants for Python Data Systems", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Tutorial", "language": "en", "abstract": "**Code-generating LLMs have matured** to the point where they can reliably scaffold **data pipelines and data agents**, when used in a **supervised, engineering-first workflow**. This tutorial demonstrates how to combine modern **AI coding assistants** with a **production-ready Python deployment platform (Tower.dev)** to build and operate **real data systems**.\r\n\r\nParticipants will learn how to structure **collaborative Human/AI Assistant development loops**, where engineers provide **architecture, domain knowledge, and review**, while AI accelerates implementation. We will build a **data pipeline** and a **lightweight data agent**, iterating with an AI assistant to **generate, test, and improve code**.  \r\n\r\nThe session also covers critical **operational concerns** such as:\r\n- **Security**\r\n- **Scaling**\r\n- **Observability**\r\n- **Debugging**\r\n\r\nYou will also see how **production feedback can be looped back into the assistant** to continuously improve generated code.\r\n\r\nThis is **not about \u201cvibe coding\u201d** a website. It is about **disciplined, review-driven AI collaboration** that meaningfully improves productivity for **data practitioners at all levels**.", "description": "This **90-minute hands-on tutorial** shows how to **design, build, and deploy Python data pipelines and data agents** using AI coding assistants in a **supervised engineering workflow**.\r\n\r\n### Outline\r\n\r\n- **The state of AI code generation** for data engineering  \r\n- Designing **collaborative Human/LLM development loops**  \r\n- Building a **data pipeline with structured AI assistance**  \r\n- Creating a **simple data agent**  \r\n- Deploying and operating **Python workloads in production** using **Tower.dev** \r\n- Using **logs, observability, and runtime feedback** to guide AI-driven refactoring  \r\n- **Best practices, risks, and guardrails**\r\n\r\nParticipants will leave with **practical patterns for integrating AI into real-world data engineering workflows**, from **prototype to production**.", "recording_license": "", "do_not_record": false, "persons": [{"code": "ZUECPC", "name": "Serhii Sokolenko", "avatar": "https://pretalx.com/media/avatars/ZUECPC_sFofzBE.webp", "biography": "Serhii Sokolenko is a co-founder of Tower, a Pythonic platform for data flows and agents running on top of open analytical storage. Prior to founding Tower, Serhii worked at Databricks, Snowflake and Google on data processing and databases.", "public_name": "Serhii Sokolenko", "guid": "c94dd5e4-64fe-536d-bfa5-f451831b6f65", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/ZUECPC/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/RRLTBU/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/RRLTBU/", "attachments": []}, {"guid": "1587e174-3e6a-5659-91a1-16f1c4769e47", "code": "KKCYJN", "id": 88352, "logo": null, "date": "2026-04-14T14:30:00+02:00", "start": "14:30", "duration": "01:30", "room": "Ferrum [2nd Floor]", "slug": "pyconde-pydata-2026-88352-your-first-open-source-contribution-in-python-from-fork-to-pull-request", "url": "https://pretalx.com/pyconde-pydata-2026/talk/KKCYJN/", "title": "Your First Open Source Contribution in Python: From Fork to Pull Request", "subtitle": "", "track": "General: Education, Career & Life", "type": "Tutorial", "language": "en", "abstract": "Contributing to open source can feel intimidating, even for experienced Python developers. In this hands-on tutorial, participants will make their first real open source contribution to a Python project, learning the complete workflow from fork to pull request.\r\n\r\nUsing a real-world Python library, attendees will practice reading an unfamiliar codebase, making a small but meaningful change, running tests, and opening a pull request following community standards. The focus is on practical skills, tooling, and confidence \u2014 not theory.\r\n\r\nBy the end of the session, participants will understand how to start contributing to Python open source projects and feel prepared to continue contributing beyond the workshop.", "description": "Open source is a core pillar of the Python ecosystem, yet many developers struggle to make their first contribution. The barriers are often not technical ability, but uncertainty around workflows, expectations, and collaboration practices.\r\n\r\nThis 90-minute hands-on tutorial guides participants through their first real contribution to an open source Python project, focusing on clarity, safety, and reproducibility. Rather than working on toy examples, attendees will contribute to ScanAPI, an actively maintained open source Python library used for automated API integration testing and live documentation.\r\n\r\nThe tutorial is designed to demystify the contribution process while remaining technically grounded and respectful of real-world open source practices.\r\n\r\nWhat participants will learn:\r\n\r\n1. Understanding an Open Source Python Project\r\n- How to quickly navigate an unfamiliar Python repository\r\n- Reading project structure, tests, and documentation\r\n- Understanding contribution guidelines and expectations\r\n\r\n2. Open Source Workflow in Practice\r\n- Forking and cloning a repository\r\n- Creating a local development environment\r\n- Working with branches and commits\r\n\r\n3. Making a First Contribution\r\n- Working on a well-scoped, beginner-friendly issue\r\n- Writing or updating Python code, tests, or documentation\r\n- Running tests locally and validating changes\r\n\r\n4. Opening a Pull Request\r\n- Writing a clear and respectful pull request description\r\n- Understanding automated checks (CI)\r\n- Responding to maintainers\u2019 feedback\r\n\r\n5. Contributing Sustainably\r\n- How to continue contributing after the workshop\r\n- Common mistakes to avoid\r\n- How open source communities scale through good engineering and collaboration\r\n\r\nAll tutorial tasks are carefully scoped and prepared in advance to ensure a smooth experience within the 90-minute timeframe. Participants will leave with a forked repository, a commit, and a pull request opened or ready, as well as the confidence to contribute to other Python open source projects.\r\n\r\nWhy ScanAPI?\r\n\r\nScanAPI is a production-grade Python library distributed via PyPI and maintained in the open. It has been recognized by GitHub as part of initiatives focused on securing the open source supply chain, making it an excellent real-world example of sustainable Python open source development. The project is supported by the Cumbuca Dev open source community, which focuses on building inclusive, contributor-friendly environments through strong engineering practices.", "recording_license": "", "do_not_record": false, "persons": [{"code": "GXBKTT", "name": "Camila Maia", "avatar": "https://pretalx.com/media/avatars/GXBKTT_X3YRK7p.webp", "biography": "Brazilian software engineer, open source maintainer, and co-founder of Cumbuca Dev, a community-driven initiative that supports underrepresented people entering and thriving in technology through real-world practice, open source collaboration, and education. With over a decade of professional experience, Camila focuses on backend engineering, developer experience, tooling and automation.\r\n\r\nShe is the creator and core maintainer of ScanAPI, a Python library for automated API integration testing and live documentation that has gathered widespread adoption and community contributions. ScanAPI has been recognized by GitHub as part of initiatives to strengthen the open source supply chain and is used by developers internationally. Camila\u2019s work spans not only code but also documentation, automation pipelines, and contributor experience practices that make open source projects more sustainable.\r\n\r\nCamila was the first Brazilian accepted into the GitHub Sponsors program, breaking new ground for maintainers in her country. She is also featured as one of ~50 global open source maintainers in the maintane.rs project, invited by the Open Source Initiative (OSI) to share her personal journey and perspectives on how open source can unlock opportunities in tech.\r\n\r\nHer engagement extends to speaking and mentoring at technical conferences around the world, including Pyjamas, EuroPython, Python Brasil, DjangoCon EU, and others, where she has presented both talks and hands-on workshops. \r\n\r\nThrough Cumbuca Dev, Camila advocates for practical learning and structured contributions as pathways to real experience, helping people from diverse backgrounds build skills, confidence, and visibility before their first job. She believes that open source is not just code \u2014 it is a vehicle for community, opportunity, and empowerment \u2014 and her work reflects a commitment to making technology spaces more accessible, collaborative, and humane.\r\n\r\nPeople > Tech \ud83d\udc9c", "public_name": "Camila Maia", "guid": "d72f6d10-da88-5169-9be6-9141a3a00fba", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/GXBKTT/"}], "links": [{"title": "canva link", "url": "https://canva.link/wtdsbs7m7ndjux6", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/KKCYJN/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/KKCYJN/", "attachments": []}, {"guid": "e0418ef6-1b87-54e4-8045-7f9c171c52f4", "code": "ZYUJH3", "id": 87062, "logo": null, "date": "2026-04-14T16:30:00+02:00", "start": "16:30", "duration": "00:30", "room": "Ferrum [2nd Floor]", "slug": "pyconde-pydata-2026-87062-how-to-search-through-800-billion-records-in-real-time", "url": "https://pretalx.com/pyconde-pydata-2026/talk/ZYUJH3/", "title": "How to Search Through 800 Billion Records in Real Time", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk", "language": "en", "abstract": "Large-scale distributed systems rarely produce clean data streams. In practice, hundreds of services continuously emit overlapping updates, retries, corrections, and partial state. Turning that constant stream of noisy events into a reliable, searchable dataset in real time, while processing hundreds of billions of records per day, requires careful architectural choices. \r\n\r\nThis talk shares practical lessons from building a Kafka-based ETL pipeline that transforms massive volumes of events into a coherent dataset suitable for real-time search. After a brief overview of the system architecture, we focus on several key techniques: reducing redundant processing through key deduplication and short-lived buffers, defining when messages can be safely acknowledged without risking data loss, and keeping long-running ETL services healthy under heavy Kafka workloads.\r\n\r\nThe session emphasizes concrete engineering trade-offs and operational realities rather than theory. Attendees will leave with practical patterns for building more reliable and efficient streaming pipelines.", "description": "Large-scale distributed systems rarely produce clean data streams. In practice, hundreds of services continuously emit overlapping updates, retries, corrections, and partial state. Turning that constant stream of noisy events into a reliable, searchable dataset in real time, while processing hundreds of billions of records per day, requires careful architectural choices. \r\n\r\nThis talk shares practical lessons from building a Kafka-based ETL pipeline that transforms massive volumes of events into a coherent dataset suitable for real-time search. After a brief overview of the system architecture, we focus on several key techniques: reducing redundant processing through key deduplication and short-lived buffers, defining when messages can be safely acknowledged without risking data loss, and keeping long-running ETL services healthy under heavy Kafka workloads.\r\n\r\nThe session emphasizes concrete engineering trade-offs and operational realities rather than theory. Attendees will leave with practical patterns for building more reliable and efficient streaming pipelines.", "recording_license": "", "do_not_record": false, "persons": [{"code": "P9UQXL", "name": "Mirano Tuk", "avatar": "https://pretalx.com/media/avatars/P9UQXL_NIqL5Sq.webp", "biography": "Principal Software Engineer at ReversingLabs, working on large-scale distributed systems and data-intensive architectures.\r\n\r\nI design and operate high-throughput, real-time pipelines, with an emphasis on reliability, observability, and performance in real-world conditions, and a practical approach to engineering trade-offs and system failures.", "public_name": "Mirano Tuk", "guid": "b164ed27-02a3-5eb7-a8f6-31611a178343", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/P9UQXL/"}, {"code": "U8YKKZ", "name": "Filip Bacic", "avatar": "https://pretalx.com/media/avatars/U8YKKZ_7BDCFRM.webp", "biography": "Software Development Manager at ReversingLabs, leading teams responsible for large-scale data processing, data quality, and technical writing. Specialized in turning complex systems into something that works, produces correct results, and is documented well enough that someone else can understand it, usually in that order.", "public_name": "Filip Bacic", "guid": "15e69612-eabd-514e-b191-2887abceed7b", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/U8YKKZ/"}], "links": [{"title": "Slides", "url": "https://pycon.tuk.hr", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/ZYUJH3/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/ZYUJH3/", "attachments": []}, {"guid": "86676f32-6ae8-5286-b1ca-9cc8a5b8251c", "code": "BAXEXY", "id": 88387, "logo": null, "date": "2026-04-14T17:10:00+02:00", "start": "17:10", "duration": "00:30", "room": "Ferrum [2nd Floor]", "slug": "pyconde-pydata-2026-88387-agent-based-hyperparameter-optimization-for-gradient-boosted-trees", "url": "https://pretalx.com/pyconde-pydata-2026/talk/BAXEXY/", "title": "Agent-Based Hyperparameter Optimization for Gradient Boosted Trees", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk", "language": "en", "abstract": "### Teaching an LLM to Tune GBDT \u2014 and Beyond\r\n\r\nHyperparameter optimization for gradient boosted tree models is a repetitive yet cognitively demanding task. Practitioners must combine statistical intuition with detailed, library-specific knowledge\u2014often buried across hundreds of pages of documentation for tools such as XGBoost, LightGBM, or CatBoost. As models and configurations grow in complexity, traditional approaches like grid search, random search, or even Bayesian optimization struggle to incorporate semantic understanding of model behavior.\r\n\r\nUsing LGBM as a concrete case study, I demonstrate how MCP and skills-powered agents, orchestrated in a structured workflow, can analyze model behavior and propose targeted hyperparameter adjustments grounded in both theory and library-specific constraints.", "description": "### Why This Problem Matters in Practice\r\n                                                                    \r\n  Hyperparameter tuning consumes a disproportionate amount of experimentation time, yet most tuning failures stem from recurring structural issues \u2014 not random chance. Experienced practitioners can spot these patterns, but automated optimizers only see scalar objective values.\r\n                                                                                                                                                               \r\n###  What Is New or Different                                                                                                                                     \r\n                                                                                                                                                               \r\n  This work reframes hyperparameter optimization as an iterative reasoning process rather than a pure search problem. Intermediate diagnostic artifacts (parameter importance, generalization gaps, plateau signals) become first-class inputs that guide subsequent decisions. Encoding this reasoning via agents enables systematic reuse of expert heuristics that are otherwise applied informally.                                                                         \r\n                  \r\n\r\n###  Scope and Limitations\r\n\r\nThe case study uses LightGBM as the sample demo, but the architecture is generic and can be applied to any ML model. The talk explicitly discusses scenarios where agent-based optimization adds limited value or introduces unnecessary complexity.                                                                                                              \r\n                  \r\n###  Audience Takeaways\r\n\r\n  Attendees will gain:\r\n  - A blueprint for putting an LLM in any decision loop with guardrails\r\n  - If you do ML: a new way to think about HPO                         \r\n  - If you don't: a reusable pattern for agent-driven automation", "recording_license": "", "do_not_record": false, "persons": [{"code": "QVC73Q", "name": "Huijo Kim", "avatar": "https://pretalx.com/media/avatars/QVC73Q_Rg5nco5.webp", "biography": "I am a machine learning practitioner and former founder working across predictive modeling, computer vision, MLOps, and autonomous systems. After studying mechanical engineering, I worked in the electric vehicle development sector at **Hyundai Motor Group**, contributing to large-scale, safety-critical automotive systems.\r\n\r\nI later founded and scaled an [**agtech startup**](http://hexafarms.com) from zero to a six-figure ARR business. This experience shaped my focus on building technology that delivers measurable, real-world value rather than chasing technical hype. After exiting, I transitioned into the **e-commerce domain**, applying machine learning to large-scale experimentation and operational optimization.\r\n\r\nMy background includes graduate research in robotics, published work in applied machine learning, and hands-on experience deploying end-to-end ML systems. I am particularly interested in explainability-driven optimization, agent-based workflows, and cross-disciplinary system design. I believe polymath practitioners\u2014those who can bridge domains\u2014will be especially valuable in the era of AI.", "public_name": "Huijo Kim", "guid": "dfaf3a39-a2b0-5635-858f-a4ddd89a4326", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/QVC73Q/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/BAXEXY/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/BAXEXY/", "attachments": [{"title": "presentation", "url": "/media/pyconde-pydata-2026/submissions/BAXEXY/resources/presen_NWtE9XT.pdf", "type": "related"}]}], "Dynamicum [Ground Floor]": [{"guid": "1958ca6d-a37a-5942-91e9-5476fa9a04b9", "code": "AF9DNH", "id": 88431, "logo": null, "date": "2026-04-14T11:45:00+02:00", "start": "11:45", "duration": "01:30", "room": "Dynamicum [Ground Floor]", "slug": "pyconde-pydata-2026-88431-sql-is-dead-long-live-sql-engineering-reliable-analytics-agent-from-scratch", "url": "https://pretalx.com/pyconde-pydata-2026/talk/AF9DNH/", "title": "SQL is Dead, Long Live SQL: Engineering reliable analytics agent from scratch", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Tutorial", "language": "en", "abstract": "Is it still worth learning SQL in 2026, or can we just \"chat\" with our data? This hands-on tutorial explores that exact question by pushing Text-to-SQL to its absolute limits. This won't be just happy paths; we will deliberately expose where LLMs fail : ambiguity, hallucinations, and \"dirty\" data...and build the engineering stack required to fix them!\r\n\r\nYou will build a local data Agent from scratch using DuckDB, MCP and a minimalist semantic layer. By the end, you will understand the hard boundaries of AI reasoning, how a semantic layer acts as a safety net, and why knowing SQL is still (since 1974) the most critical skill for building reliable analytics agents.", "description": "This session is a \"reality check\" for AI analytics. We combine theory with engineering to answer one question: Where are the limits of Text-to-SQL? Participants will experience the frustration of a hallucinating LLMs and the satisfaction of fixing it with a realistic minimalist local setup.\r\n\r\nLearning objectives:\r\n1. Map the limits: Identify exactly where LLMs break (e.g., complex joins, specific business logic, non-standard schemas).\r\n2. Bridge the gap: Learn how a semantic layer translates fuzzy English into deterministic SQL.\r\n3. Modern architecture: Overview and hands-on on DuckDB Model Context Protocol (MCP) to give agents standard, safe tools to do analytics.\r\n4. The verdict: Understand why SQL is becoming the \"Assembly Language\" of the AI era, and why you still need to be fluent in it and what is still missing to just \"chat with our data\".\r\n\r\nPrerequisites:\r\n- Laptop with Python 3.10+.\r\n- Beginner SQL knowledge (joins, aggregations).\r\n- No prior AI/LLM experience required.", "recording_license": "", "do_not_record": false, "persons": [{"code": "MPYCX8", "name": "Mehdi Ouazza", "avatar": "https://pretalx.com/media/avatars/MPYCX8_yYr02Wg.webp", "biography": "I started my career in data 10+ years ago as a data engineer, working in large corporates like AXA setting up on-prem Spark clusters (yes, that old!) to tech unicorns building data platforms in the cloud at Klarna, Back Market, and Trade Republic.\r\n\r\nOver the years, I found a passion for sharing what I learned and teaching others. It became my full-time job when I joined as the first DevRel at MotherDuck (DuckDB in the cloud) in 2023.\r\n\r\nI believe learning should be fun. I enjoy making complex topics more approachable through storytelling and creativity.\r\n\r\nI want to keep teaching curious students (in-person and online) and help the next generation learn not just data, but software engineering in this post-AI world.", "public_name": "Mehdi Ouazza", "guid": "82c1138f-768a-5981-8a46-b4c95f5f852a", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/MPYCX8/"}, {"code": "VHNV7X", "name": "Dumky de Wilde", "avatar": "https://pretalx.com/media/avatars/JKPLLF_uiUfBcc.webp", "biography": "I spent over 10 years as a consultant setting up data pipelines, data models, and cloud infrastructure for clients ranging from government to fintech to retail and energy, before joining MotherDuck to help people and their AI agents make the most of the platform through documentation, examples, and other content.\r\n\r\nI am the co-author of The Fundamentals of Analytics Engineering, and I love writing about all things data \u2014 both at MotherDuck and on my personal blog at dumky.net.", "public_name": "Dumky de Wilde", "guid": "17e5698e-91bf-534c-b60f-cd4a800fd3e5", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/VHNV7X/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/AF9DNH/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/AF9DNH/", "attachments": []}, {"guid": "f1cb0c82-d9a2-5612-a5dc-f731f16c83a9", "code": "9ZKYRD", "id": 86216, "logo": null, "date": "2026-04-14T14:30:00+02:00", "start": "14:30", "duration": "01:30", "room": "Dynamicum [Ground Floor]", "slug": "pyconde-pydata-2026-86216-a-minimalist-introduction-to-ansible", "url": "https://pretalx.com/pyconde-pydata-2026/talk/9ZKYRD/", "title": "A minimalist introduction to Ansible", "subtitle": "", "track": "PyCon: MLOps & DevOps", "type": "Tutorial", "language": "en", "abstract": "[Ansible](https://docs.ansible.com/) is a popular [infrastructure as code](https://en.wikipedia.org/wiki/Infrastructure_as_code) tool for server configuration and software deployment. This tutorial will cover things that I wish the first day that I started using Ansible to manage the projects at my work.", "description": "[Ansible](https://docs.ansible.com/) is a popular Python package for declarative configuration of servers that includes batteries (for example, encrypted vault for secrets and Jinja template engine). As a Swiss Army knife, Ansible is capable of solving my problems but come with many features that novices will not know how to use. This tutorial is hands-on and will guide attendees to learn the core features of Ansible. Attendees must have Podman or Docker installed in the machine they will use during the tutorial.", "recording_license": "", "do_not_record": false, "persons": [{"code": "DVTDFJ", "name": "Raniere Silva", "avatar": "https://pretalx.com/media/avatars/DVTDFJ_uIE0u3z.webp", "biography": "I'm a Research Software Engineer helping social scientists to have their work reproducible. I'm a former The Carpentries instructor and content creator.", "public_name": "Raniere Silva", "guid": "fa8cf085-eb53-5d14-9acf-8f5ff2bd311c", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/DVTDFJ/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/9ZKYRD/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/9ZKYRD/", "attachments": [{"title": "Slides used during the presentation", "url": "/media/pyconde-pydata-2026/submissions/9ZKYRD/resources/A_mini_yjUhTQr.pdf", "type": "related"}, {"title": "Files for sandbox used during the tutorial", "url": "/media/pyconde-pydata-2026/submissions/9ZKYRD/resources/sandbo_IXO74yK.zip", "type": "related"}]}, {"guid": "34cf9023-ab9b-59d8-be74-bece48192fd1", "code": "3JLSEF", "id": 85009, "logo": null, "date": "2026-04-14T16:30:00+02:00", "start": "16:30", "duration": "00:30", "room": "Dynamicum [Ground Floor]", "slug": "pyconde-pydata-2026-85009-catch-the-llm-if-you-can-watermarking-llms", "url": "https://pretalx.com/pyconde-pydata-2026/talk/3JLSEF/", "title": "Catch the LLM if you Can: Watermarking LLMs", "subtitle": "", "track": "General: Ethics & Privacy", "type": "Talk", "language": "en", "abstract": "With Large Language Models (LLMs), generating high-quality text and images is easy and so is\r\nmisusing it. As AI-generated content becomes harder to distinguish from human generated content,\r\ndevelopers are increasingly asking: How can we verify whether a piece of text comes from an LLM?\r\nWe\u2019ll explore Python\u2019s simplicity and rich ecosystem of libraries to solve this problem.\r\n\r\nThis talk introduces the foundations of LLM watermarking and shows how developers can implement\r\nthese techniques entirely in Python. We\u2019ll discuss two core approaches, EXP sampling method and\r\nKGW method. We will go through the implementation of the KGW method using simple,\r\ntransparent code, and compare it with the EXP approach. There's no need for a large model or a GPU\r\ncluster to understand how these systems work and the core ideas can be implemented in pure\r\nPython using simple code. The code repositories, which includes both methods will be provided so\r\nthat the attendees can follow along.\r\n\r\nAlong the way, we\u2019ll discuss the trade-offs and the limitations of current research. And for those\r\nwondering, \u201cDo I have to implement all this myself?\u201d, the talk concludes with a quick overview of MarkLLM, an existing open-source toolkit that provides a unified Python interface for experimenting with watermarking algorithms.\r\n\r\nAttendees will leave with a clear understanding of how watermarking works, when it\u2019s useful, and\r\nhow to integrate these techniques into real-world Python projects.", "description": "During the talk we will cover:\r\n1. Why Watermarking Matters?\r\n     - What can go wrong when AI-generated content becomes indistinguishable from human writing\r\n     -  Why provenance and transparency are becoming essential to trust and safety.\r\n2. How LLM Watermarking Works?\r\n     - What is a watermark and what isn't\r\n     - The core idea behind statistical watermarking\r\n3. Two Key Algorithms implemented using Python's established frameworks\r\n     - EXP Watermark: modifying logits with pseudo-random perturbations.\r\n     - KGW Green-List Watermark: partitioning tokens into \u201cgreen\u201d and \u201cred\u201d lists to bias sampling.\r\n     - Python implementation of the KGW method and comparing it with the EXP method.\r\n4.  How you can use MarkLLM (open-source toolkit)\r\n     - How to use the toolkit for experiments in your own workflows.\r\n5. Real-World Challenges and Limitations\r\n     - How robust and evasive are the current algorithms\r\n\r\nKey Takeaways:\r\n     - Watermarking is a promising tool for provenance.\r\n     - Understanding these methods helps build more transparent and trustworthy AI systems.\r\nThis talk is for people who:  \r\n   - Care about ethics and privacy in AI and want to understand what watermarking can (and cannot)  solve.\r\n   - Build applications using LLMs and want mechanisms for verifying generated text.\r\n   - Are ML researchers or hobbyists interested in how watermarking algorithms function at a technical level.\r\n   - Work in AI safety, trust & transparency, or responsible AI and need practical tools for content provenance.\r\n\r\nNote: No prior experience with LLM architecture is required, basic familiarity with probability is recommended; no advanced math needed.", "recording_license": "", "do_not_record": false, "persons": [{"code": "EYPGPQ", "name": "Subhosri Basu", "avatar": "https://pretalx.com/media/avatars/EYPGPQ_FTxVyf6.webp", "biography": "I am a GenAI researcher at Fraunhofer Institute, Germany. Born in India, I decided to move to Germany in search of new challenges. My professional journey has been shaped by a passion to solve problems in various domains. Academically, I have graduated with a Master's degree from the department of electrical and computer science. My focus has always been around statistics. I have been able to work on projects related to artificial intelligence and deep learning, especially in the field of signal processing and imaging. With my experience, I want to guide the growth of next generation of ML researcher. When I am not working, you will find me exploring Europe.", "public_name": "Subhosri Basu", "guid": "3b11da48-c601-5c3c-bf80-9cca411698fb", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/EYPGPQ/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/3JLSEF/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/3JLSEF/", "attachments": [{"title": "Slides for the talk", "url": "/media/pyconde-pydata-2026/submissions/3JLSEF/resources/Catch__s9gCThO.pdf", "type": "related"}]}, {"guid": "16e8bda2-1e83-5739-b6b1-34d8b66426c4", "code": "7JXYKH", "id": 84979, "logo": null, "date": "2026-04-14T17:10:00+02:00", "start": "17:10", "duration": "00:30", "room": "Dynamicum [Ground Floor]", "slug": "pyconde-pydata-2026-84979-offline-fallback-for-a-mobile-lorawan-gateway", "url": "https://pretalx.com/pyconde-pydata-2026/talk/7JXYKH/", "title": "Offline Fallback for a Mobile LoRaWAN Gateway", "subtitle": "", "track": "General: Infrastructure - Hardware & Cloud", "type": "Talk", "language": "en", "abstract": "LoRaWAN gateways typically depend on cloud-based network servers, creating a vulnerability during internet outages. This talk presents a hybrid solution: a Raspberry Pi-based mobile gateway that operates on The Things Stack Sandbox while simultaneously decoding all device messages locally.\r\n\r\nThe system leverages existing network infrastructure for broad coverage during normal operation, while maintaining full local data access when connectivity fails. This is particularly valuable for emergency response scenarios and remote monitoring where sensor data must remain available regardless of network conditions.\r\n\r\nThe implementation uses Python for gateway orchestration and API integration, while incorporating existing JavaScript libraries (`lora-packet` and device decoders) for LoRaWAN decryption and payload decoding. Data is stored locally in SQLite for reliability and easy access.", "description": "LoRaWAN (Long Range Wide Area Network) is widely used for IoT sensor deployments due to its long range and low power consumption. Operating at 868MHz across Europe, it's ideal for remote monitoring applications\u2014from water level sensors to asset tracking and personnel location systems. However, traditional LoRaWAN deployments rely on cloud-based network servers, making them vulnerable to internet outages.\r\n\r\n**The Challenge**\r\n\r\nWhile networks like The Things Stack provide good geographic coverage, gaps remain\u2014particularly in remote areas where emergency response units operate. A mobile gateway can close these gaps, but standard configurations still require internet connectivity. You could deploy a completely local network with your own network server, but this sacrifices the existing infrastructure's coverage. \r\n\r\n**The Solution**\r\n\r\nThis talk presents a hybrid architecture that combines cloud-based operation with local resilience. The system primarily operates through The Things Stack Sandbox, leveraging its network coverage. Simultaneously, a Raspberry Pi-based mobile gateway decodes all messages from your devices locally in parallel. During normal operation, you benefit from cloud features. When internet connectivity fails, your sensor data remains accessible locally on the gateway.\r\n\r\n**Technical Implementation**\r\n\r\nThe solution consists of:\r\n\r\n1. **Raspberry Pi Gateway**: Configured as a mobile LoRaWAN gateway for The Things Stack Sandbox, suitable for vehicle deployment\r\n2. **Session Key Management**: Python service retrieving session keys for your devices via The Things Stack API\r\n3. **Local Message Processing**: Real-time decryption and decoding of LoRaWAN messages without internet dependency\r\n4. **Data Storage**: SQLite-based local storage for reliable data persistence\r\n\r\n**Python and JavaScript Integration**\r\n\r\nThe core implementation uses Python for gateway orchestration, API integration, and data management. For LoRaWAN encryption/decryption and payload decoding, the system leverages existing JavaScript libraries\u2014specifically `lora-packet` and community-maintained device decoders. This talk demonstrates practical patterns for Python/JavaScript interoperability.\r\n\r\n**Real-World Context**\r\n\r\nDrawing from volunteer emergency response experience, this solution addresses operational requirements where sensor data must remain available regardless of infrastructure status. The system ensures continuity of critical information during incidents.\r\n\r\n**What You'll Learn**\r\n\r\n- Designing resilient edge computing architectures for IoT\r\n- Integrating Python with JavaScript libraries\r\n- LoRaWAN security fundamentals (session keys, encryption)\r\n- Building offline-first systems with SQLite\r\n- API integration with The Things Stack\r\n\r\n**Open Source**\r\n\r\nComplete implementation available on GitHub, providing a reproducible setup valuable for volunteer organizations, research projects, and scenarios requiring IoT infrastructure that remains operational during connectivity disruptions.\r\n\r\n**Target Audience**\r\n\r\nPython developers interested in IoT and edge computing. No prior LoRaWAN experience required.", "recording_license": "", "do_not_record": false, "persons": [{"code": "KSM737", "name": "Jannis L\u00fcbbe", "avatar": null, "biography": "2008 \r\nM.Sc. Physics and Computer Science at Osnabr\u00fcck University\r\n\r\n2012 \r\nPhD in Physics at Osnabr\u00fcck University\r\n\r\n2013 - now \r\nSensor Developer at ROSEN Group\r\n\r\n2000 - now \r\nVolunteer operative in the Federal Agency for Technical Relief (THW, Germany)", "public_name": "Jannis L\u00fcbbe", "guid": "5f0a198b-8135-5bd0-a9c6-203cc457e65f", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/KSM737/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/7JXYKH/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/7JXYKH/", "attachments": []}]}}, {"index": 2, "date": "2026-04-15", "day_start": "2026-04-15T04:00:00+02:00", "day_end": "2026-04-16T03:59:00+02:00", "rooms": {"Merck Plenary (Spectrum) [1st Floor]": [{"guid": "3e68a03f-9626-5ea2-ad36-9f2fa7b346f0", "code": "CMDHUN", "id": 89403, "logo": null, "date": "2026-04-15T09:05:00+02:00", "start": "09:05", "duration": "00:45", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-89403-honey-i-vibe-coded-some-crypto-security-in-the-age-of-llms", "url": "https://pretalx.com/pyconde-pydata-2026/talk/CMDHUN/", "title": "\"Honey, I vibe coded some crypto\" - Security in the age of LLMS", "subtitle": "", "track": "Keynote", "type": "Keynote", "language": "en", "abstract": "What only a few years ago started out as smart tab completion turned into a way of working in which a growing number of programmers don't even bother to open up an IDE anymore. Let's take a moment to contemplate the changing nature of software engineering as a profession, and to explore chances to avoid looming disaster.", "description": "What only a few years ago started out as smart tab completion turned into a way of working in which a growing number of programmers don't even bother to open up an IDE anymore. Let's take a moment to contemplate the changing nature of software engineering as a profession, and to explore chances to avoid looming disaster. \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e \u200e", "recording_license": "", "do_not_record": false, "persons": [{"code": "RNBVEV", "name": "Gabriela Bogk", "avatar": "https://pretalx.com/media/avatars/RNBVEV_dJ0Vxj5.webp", "biography": "It's been more than 40 years since Gabriela first touched a computer keyboard. Becoming a hacker at a young age out of necessity, it's not like you could buy computer games in East Germany, she learned how copy protection schemes work, setting the foundation for a lifelong passion for a deep understanding of computers and getting them to do things they weren't supposed to do. The passion turned into a career of 30 years in tech, more than 20 of them in information security. She's been active in numerous roles at the Chaos Computer Club over the years, and after a colorfuil career in many roles is currently earning a living as CISO of mobile.de - selling used cars is a very ethical path, considering all the options in cyber security. But it's not just security, Gabriela also has a passion for programming languages, having been the core maintainer of open source Dylan compilers for many years, even being paid for maintenance of a Lisp compiler for a while. But if a job needs to be done, more often than not she reaches out for Python to this day.", "public_name": "Gabriela Bogk", "guid": "285ba729-6cb5-5aae-9a72-ab69373b751f", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/RNBVEV/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/CMDHUN/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/CMDHUN/", "attachments": []}, {"guid": "a3c31f9c-68c9-5ff7-ac1b-9d852fb604be", "code": "BLC7FS", "id": 87636, "logo": null, "date": "2026-04-15T10:15:00+02:00", "start": "10:15", "duration": "00:30", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-87636-demystifying-containers-with-python-building-a-minimal-engine-from-scratch", "url": "https://pretalx.com/pyconde-pydata-2026/talk/BLC7FS/", "title": "Demystifying Containers with Python: Building a Minimal Engine from Scratch", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk", "language": "en", "abstract": "Containers are a fundamental part of the modern developer's toolkit, yet they are frequently misunderstood and described as \"lightweight virtual machines.\" This talk demystifies containerization by building a functional, minimal engine from scratch using only the python standard library. We will step away from high-level tools like docker to explore how the linux kernel provides isolation through features like `namespaces` and `chroot`. Using a hands-on approach, we will demonstrate how to set up a sandboxed environment, isolate a filesystem, and execute processes within it. This session is designed for developers who use containers daily but haven't yet had the opportunity to look under the hood or explore the underlying operating system principles. By implementing a simplified version of these tools, you will gain a clearer, more practical understanding of the core mechanics that make containerization possible.", "description": "In modern software development, containers have become a standard tool for deploying code. However, they are frequently misunderstood and described as \"lightweight virtual machines.\" For many developers - especially those transitioning from academia, like myself - the layer between their python code and the operating system kernel is often overlooked. This talk is based on the idea that the best way to understand a concept is to implement it in its simplest form. By bypassing the complexity of modern container orchestrators, we can focus on the fundamental system calls that make isolation possible.\r\n\r\nDuring the session, we will demonstrate the core mechanics of containerization by building a minimal engine in python. We will begin by preparing a root filesystem to show what a container image actually is at its most basic level. We will implement isolation using the `os.chroot()` function to trap a process in a specific directory and will talk about linux namespaces, which isolate what a process can see, and `cgroups`, which limit how much of the hardware resources a process can use.\r\n\r\nThe main takeaways of this talk include a clear technical distinction between virtual machines and containers and the realization that a container is essentially a process with a restricted view of the host system. You will gain practical knowledge of the `os` module for system-level tasks and the confidence to explore low-level computer science concepts by implementing them in python. By the end of this session, you will have a practical understanding of the basic principles that make containerization possible.", "recording_license": "", "do_not_record": false, "persons": [{"code": "U3VPD3", "name": "Alexander Zaytsev", "avatar": "https://pretalx.com/media/avatars/U3VPD3_vCbEueI.webp", "biography": "I am a data engineer at Blue Yonder, where I build infrastructure for large-scale demand forecasting solutions. My career spans nearly a decade in academia, high-tech R&D, and industry, during which python has been a constant tool across a wide variety of environments. During my PhD in physics, I used python to analyze time-series data from some of the world\u2019s most precise quantum sensors in the search for dark matter. Earlier in my career, I applied python to data analysis and modeling in high-precision laser gyroscope R&D, and today I continue to use it to develop robust, production-grade machine learning systems.", "public_name": "Alexander Zaytsev", "guid": "6f976f5d-26a0-54e3-bb3a-ee77406ca892", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/U3VPD3/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/BLC7FS/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/BLC7FS/", "attachments": [{"title": "Presentation slides pptx", "url": "/media/pyconde-pydata-2026/submissions/BLC7FS/resources/slide_7WonNnh.pptx", "type": "related"}]}, {"guid": "20b32a3d-dffb-5767-8646-ca99c61b3334", "code": "GBKUNF", "id": 86540, "logo": null, "date": "2026-04-15T10:55:00+02:00", "start": "10:55", "duration": "00:30", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-86540-how-to-create-effective-data-visualizations", "url": "https://pretalx.com/pyconde-pydata-2026/talk/GBKUNF/", "title": "How to create effective data visualizations", "subtitle": "", "track": "PyData: Visualisation & Notebooks", "type": "Talk", "language": "en", "abstract": "What distinguishes a lousy plot from a beautiful chart that communicates insights effectively? This talk will show you the underlying principles of good data visualization, offer lots of practical tips and tricks and give an overview of the data visualization landscape in Python. \r\nAfter the talk, you will be able to create better charts, whether for exploring your own data or for communicating results to others.", "description": "In this talk, you will learn about:\r\n\r\n- **Fundamental principles** of data visualization\r\n    - The Grammar of Graphics\r\n    - Visual hierarchy\r\n    - Data storytelling\r\n- **Best practices** regarding:\r\n    - Which colors to use\r\n    - Visual comparability \r\n    - Pros/cons of several chart types\r\n    - Context and audience: Adding text and annotations\r\n- The **data visualization landscape in Python**\r\n    - What libraries exist: matplotlib, plotly, altair etc., including add-ons and lesser-known ones\r\n    - What are their differences and strengths?\r\n    - Which library is suited for which usecase?\r\n\r\n\r\nEquipped with the knowledge presented in this talk, you will understand why certain charts are more aesthetically pleasing and more effective at conveying information than others. Apply the shown principles, take into account best practices and choose the right tools in Python to create more beautiful and impactful data visualizations.", "recording_license": "", "do_not_record": false, "persons": [{"code": "LCLSQC", "name": "Dominik Haitz", "avatar": "https://pretalx.com/media/avatars/LCLSQC_TRgRWYH.webp", "biography": "Dominik is a Senior Data Scientist with multiple years of experience in various industries. Enthusiastic about data and technology, he creates solutions that deliver real business value.", "public_name": "Dominik Haitz", "guid": "0cb24709-d058-52fb-8b03-f3ab998b2f12", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/LCLSQC/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/GBKUNF/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/GBKUNF/", "attachments": [{"title": "Talk Slides", "url": "/media/pyconde-pydata-2026/submissions/GBKUNF/resources/2026-0_XktrNNS.pdf", "type": "related"}]}, {"guid": "fc98e299-5038-54f4-aae4-c7ad0a52b461", "code": "PMMEAG", "id": 87685, "logo": null, "date": "2026-04-15T11:35:00+02:00", "start": "11:35", "duration": "00:30", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-87685-production-ml-across-2015-2035-a-journey-to-the-past-and-the-future", "url": "https://pretalx.com/pyconde-pydata-2026/talk/PMMEAG/", "title": "Production ML across 2015-2035: A Journey to the Past and the Future", "subtitle": "", "track": "PyCon: MLOps & DevOps", "type": "Talk", "language": "en", "abstract": "This talk is an exciting journey that revisits the past decade of Production Machine Learning from 2015 until now, and provides a pragmatic outlook of the next decade towards 2035. We\u2019ll revisit some of the cornerstone python projects that served as the foundation of the \"messy innovation\" boom (feature stores, orchestration, model serving, monitoring), as well as how it transitioned towards the LLMOps era shifting the stack from training-centric to inference-centric. We will also provide a pragmatic set of predictions for the next decade of MLOps, including some of the trends in ML monitoring, agentic systems and beyond - this will provide actionable guidance to all practitioners to ensure we stay ahead of the curve on the expected skills and domains required to thrive in the near future to come.", "description": "# Outline\r\n\r\n1) Motivations;\r\n2) MLOps Foundations;\r\n3.1) The Past - 2015 - Genesis;\r\n3.2) The Past - 2018 - Messy Innovation;\r\n3.3) The Past - 2023 - LLMOps;\r\n4) The Future - 2025-2035 Outlook;\r\n5) Reflections.\r\n\r\n# Description\r\n\r\nThe lifecycle of a machine learning model only begins once it\u2019s in production. In this talk we take a practical journey through the last decade of production ML, tracing back the early beginnings of MLOps to the respective research and projects that helped drive the movement forward. We cover how the ecosystem went through explosive growth through COVID with a broad range of tools and vendors tacking similar problems in very different ways. We then talk about the most recent trends in LLMOps which has shifted the stack from training-centric to inference-centric as pre-trained models have become broadly available. Namely on how the locus of engineering moves to the application layer (ie inference time), introducing new artifacts such as prompts, vector databases, and tool metadata, and accelerating another wave of ecosystem heterogeneity. \r\n\r\nWith those lessons in place, we look forward to 2035 through a set of pragmatic milestones for consolidation and standardization: how monitoring and observability become more ubiquitous, how MLOps and LLMOps stacks align, how time-to-production compresses, and how operations gradually evolves toward more autonomous patterns (progressive rollouts, agent-assisted RCA, and early self-healing behaviors). \r\n\r\nFinally, we close with actionable guidance grounded in production reality: how to right-size platform complexity to organizational scale, where to invest early to reduce future operational debt, and how to increase the scale of ML delivery while actively reducing system complexity. Attendees should leave with a coherent mental model of the MLOps landscape, a sharper understanding of why production ML remains hard, and a concrete set of engineering priorities for building reliable ML systems through the next decade.", "recording_license": "", "do_not_record": false, "persons": [{"code": "EQMGKH", "name": "Alejandro Saucedo", "avatar": "https://pretalx.com/media/avatars/EQMGKH_QNrxGhV.webp", "biography": "Alejandro is the Director of the Markets AI, Data & Platform at Zalando SE, where he is responsible for petabyte-scale AI & Data platforms that power the Pricing, Traffic and Trading technology across the group. He is also Scientific Advisor at the Institute for Ethical AI, where he has led contributions to EU policy, including the AI Act, the Data Act and the Digital Services Act, among others. Alejandro is currently appointed as AI Expert at the United Nations and the European Commission, and serves as Board Member at the ACM's Board of Directors.", "public_name": "Alejandro Saucedo", "guid": "034fd9f1-9dd4-517c-949e-81eabe2bc306", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/EQMGKH/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/PMMEAG/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/PMMEAG/", "attachments": []}, {"guid": "a86981e3-91f4-54e8-890a-3cfb05662d70", "code": "LPUC9T", "id": 89413, "logo": null, "date": "2026-04-15T13:25:00+02:00", "start": "13:25", "duration": "00:45", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-89413-the-multimodal-era-of-machine-learning-and-how-python-made-it-possible", "url": "https://pretalx.com/pyconde-pydata-2026/talk/LPUC9T/", "title": "The Multimodal Era of Machine Learning (and How Python Made It Possible)", "subtitle": "", "track": "Keynote", "type": "Keynote", "language": "en", "abstract": "Multimodal learning - systems that combine vision, language, audio, and other sensory inputs\u2014has moved from a niche research topic to a central paradigm in modern machine learning. Today\u2019s most influential models no longer operate on a single modality but instead learn rich representations by combining language with images, videos, sound. This shift has fundamentally changed how we build, train, and evaluate current machine learning systems. Python has played a decisive role in this transformation. Acting as a unifying layer across modalities, Python enabled researchers and practitioners to seamlessly combine computer vision, natural language processing, and speech within a single ecosystem. Python-based frameworks lowered the barriers between research communities, and accelerated the rise of large-scale, weakly supervised, and foundation models. However, this success has also introduced new challenges. The ease of experimentation masks growing issues around scalability, reproducibility, and evaluation. Multimodal systems increasingly depend on complex Python-based stacks whose abstractions can obscure underlying assumptions and costs.\r\n...", "description": "Multimodal learning\u2014systems that combine vision, language, audio, and other sensory inputs\u2014has moved from a niche research topic to a central paradigm in modern machine learning. Today\u2019s most influential models no longer operate on a single modality but instead learn rich representations by combining language with images, videos, sound. This shift has fundamentally changed how we build, train, and evaluate current machine learning systems. Python has played a decisive role in this transformation. Acting as a unifying layer across modalities, Python enabled researchers and practitioners to seamlessly combine computer vision, natural language processing, and speech within a single ecosystem. Python-based frameworks lowered the barriers between research communities, and accelerated the rise of large-scale, weakly supervised, and foundation models. However, this success has also introduced new challenges. The ease of experimentation masks growing issues around scalability, reproducibility, and evaluation. Multimodal systems increasingly depend on complex Python-based stacks whose abstractions can obscure underlying assumptions and costs.\r\nThis keynote will reflect on the current state of multimodal learning, examine how Python shaped its trajectory, and critically discuss the technical and conceptual challenges that lie ahead aiming to provide a perspective on where machine learning in general and multimodal learning in particular is succeeding, where it is struggling, and what role the Python community can play in shaping its next phase.", "recording_license": "", "do_not_record": false, "persons": [{"code": "NHKXFV", "name": "Hilde K\u00fchne", "avatar": "https://pretalx.com/media/avatars/NHKXFV_O7wmrg1.webp", "biography": "Prof. Dr. Hilde Kuehne is a Professor of Multimodal Learning at the T\u00fcbingen AI Center and an affiliated professor at the MIT\u2013IBM Watson AI Lab. Previously, she was a Professor of Computer Vision and Multimodal Learning at the University of Bonn. She received her PhD from the cv:hci lab at the Karlsruhe Institute of Technology (KIT), where she was supervised by Rainer Stiefelhagen, and subsequently held postdoctoral positions at Fraunhofer FKIE and in the Computer Vision Group led by Prof. J\u00fcrgen Gall.\r\nHer research focuses on video understanding, with a particular emphasis on learning without labels and multimodal video understanding. She has created several highly cited datasets and foundational works for analyzing large collections of untrimmed video data, including HMDB51, which was awarded both the ICCV 2021 Helmholtz Prize and the PAMI Mark Everingham Prize.", "public_name": "Hilde K\u00fchne", "guid": "02d3e781-800b-5fdb-aae1-919ce0817465", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NHKXFV/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/LPUC9T/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/LPUC9T/", "attachments": [{"title": "Slides - The Multimodal Era of Machine Learning", "url": "/media/pyconde-pydata-2026/submissions/LPUC9T/resources/The_Mu_7JwabcM.pdf", "type": "related"}]}, {"guid": "4436b245-574a-51a2-8031-0c05afd83f1d", "code": "QBJRBJ", "id": 95713, "logo": null, "date": "2026-04-15T14:20:00+02:00", "start": "14:20", "duration": "01:00", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-95713-pyladies-fireside-chat", "url": "https://pretalx.com/pyconde-pydata-2026/talk/QBJRBJ/", "title": "PyLadies Fireside Chat", "subtitle": "", "track": "General: Community & Diversity", "type": "Panel", "language": "en", "abstract": "What does it mean to build with Python when AI is reshaping everything? Join Dawn Gibson Wages, Jessica Greene, and host Tereza Iofciu for an honest conversation about Python, local AI, and the craft of being a developer today.", "description": "Join us for this fireside chat, where Tereza Iofciu sits down with Dawn Gibson Wages, community and DevRel lead at Anaconda with a passion for local-first AI and Python environments,  and Jessica Greene, Senior ML Engineer at Ecosia and PyLadies community lead, for a candid conversation about building with Python in the age of AI. Careers, craft, community, and the questions the hype tends to skip.", "recording_license": "", "do_not_record": false, "persons": [{"code": "NMACLQ", "name": "Tereza Iofciu", "avatar": "https://pretalx.com/media/avatars/NMACLQ_7YAZwK6.webp", "biography": "Tereza Iofciu is a data and AI expert, leadership coach, and PSF Fellow with 15+ years of experience leading data and product teams at neuefische, FREE NOW, and New Work (XING). She helps professionals lead and adapt in the age of AI through her Data Diplomat Framework\u2122, bridging technical depth with human leadership.", "public_name": "Tereza Iofciu", "guid": "9f1c4db3-3e40-5e40-a06d-ad540d3a75fc", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NMACLQ/"}, {"code": "WS7BU8", "name": "Dawn Wages", "avatar": "https://pretalx.com/media/avatars/LDQPN9_BEpPpGr.webp", "biography": "Dawn Gibson Wages is a software engineer, ethical open source advocate, and community leader. She is the former Chair of the Python Software Foundation Board (volunteer) and currently works as Director of Community and Developer Relations at Anaconda. When she's not working in the Python ecosystem, she is watching Star Trek in Philadelphia with her wife and two dogs.", "public_name": "Dawn Wages", "guid": "a05c5500-4058-5c2e-90bb-c88f9129226a", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/WS7BU8/"}, {"code": "HEGPKL", "name": "Jessica Greene (she/her)", "avatar": "https://pretalx.com/media/avatars/BWLW8F_VPWYIk2.webp", "biography": "Jessica Greene is a self/community-taught developer who came to tech by way of the film industry and speciality coffee roasting. She is now a Senior Machine Learning Engineer at Ecosia.org, where she explores how ML and generative AI can support climate action. Passionate about ethical, sustainable, and inclusive technology, Jessica co-leads PyLadies Berlin, serves on the board of the Python Software Verband (PySV), and is part of the Python Software Foundation\u2019s Conduct Working Group. In 2024, she was honoured with the inaugural Outstanding PyLadies Award and the PSF Community Service Award for her contributions to the Python ecosystem. Outside of coding, she knits, reads, enjoys travel, photography and spending time with her niece and nephew.", "public_name": "Jessica Greene (she/her)", "guid": "5c127147-5084-5334-87dd-194821b36b3a", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/HEGPKL/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/QBJRBJ/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/QBJRBJ/", "attachments": []}, {"guid": "eec67fa5-2cff-5565-9856-613643f409c2", "code": "CNNUZC", "id": 95697, "logo": null, "date": "2026-04-15T16:15:00+02:00", "start": "16:15", "duration": "01:10", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-95697-start-ups-investors", "url": "https://pretalx.com/pyconde-pydata-2026/talk/CNNUZC/", "title": "Start-Ups & Investors", "subtitle": "", "track": null, "type": "Panel", "language": "en", "abstract": "Starting a company doesn't require a garage or an MBA \u2014 it takes a real problem, a strong team, and someone willing to turn a project into a product. This panel brings together founders, investors, and startup builders from academia, corporate careers, and venture capital for an honest conversation about what it really takes to build a startup in AI, software, and open source. \r\n\r\nAt PyCon DE & PyData, we're surrounded by people solving real problems with code and data every day. This session is for anyone who's ever wondered: could my project become a product? The answer might surprise you.", "description": "The Python and AI community is full of people who build tools, train models, and solve hard problems \u2014 but the leap from project to product often feels like a different world entirely. This panel closes that gap.\r\n\r\nFour women from very different backgrounds \u2014 a former SAP SVP turned startup investor, a TU Darmstadt researcher turned Forbes 30 Under 30 founder, a venture capital managing partner, and an AI startup ecosystem builder \u2014 share what founding and funding a company actually looks like. \r\n\r\nNo polished success stories, no pitching. Just real talk about first steps, financing, team building, and the support systems that exist but few people know about.\r\n\r\nWhy this panel at PyCon DE & PyData? Because the people in this room are exactly who Germany's AI and open-source startup ecosystem needs. You understand the technology. You work with data. You build things that work. \r\n\r\nWhat's often missing isn't the idea or the skill \u2014 it's the confidence, the network, and the knowledge of how to start. This panel provides all three.\r\n\r\nWe'll cover five themes: the spark that starts a founding journey, the reality behind startup clich\u00e9s, what technical founders need beyond code, how to find the right networks and funding, and concrete first steps anyone can take.\r\n\r\nThe panel is especially aimed at developers considering turning a side project into a startup, researchers exploring the path from paper to product, and professionals in industry wondering whether the leap from a corporate career is right for them. We also want to actively encourage more women to see themselves as founders \u2014 which is why representation on this stage matters.\r\n\r\nWhether you leave with a concrete next step, a new contact, or simply a more realistic picture of what founding looks like \u2014 this session is designed to make the startup world feel less like an exclusive club and more like a path that's open to you.", "recording_license": "", "do_not_record": false, "persons": [{"code": "93FF78", "name": "Ina Schlie", "avatar": "https://pretalx.com/media/avatars/78ZETH_bM0B20Q.webp", "biography": "Co-Founder, encourageventures, former Senior Vice President at SAP SE with over 20 years in the company. She serves and served on the supervisory boards of CMBlu, Heidelberger Druckmaschinen AG, W\u00fcrth, Uni Rat Konstanz, and q.beyond AG. She co-founded encourageventures e.V., an investor network dedicated to backing diverse founding teams and encouraging more women to become entrepreneurs.", "public_name": "Ina Schlie", "guid": "18677b6a-d09e-55d8-8138-b81b4e33abe8", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/93FF78/"}, {"code": "89EKYK", "name": "Carlina Bennison", "avatar": "https://pretalx.com/media/avatars/WNELMW_l6akoL8.webp", "biography": "Co-Lead AI Startup Rising, Hessian AI Co-leads the BMWE-funded \"AI Startup Rising\" program at hessian.AI. Previously built the SpeedUpSecure accelerator and advised startups on funding, business models, and IT security at TU Darmstadt.", "public_name": "Carlina Bennison", "guid": "231058a8-7314-5290-ba07-49dae2cd3a81", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/89EKYK/"}, {"code": "H9BQVG", "name": "Sara Jourdan", "avatar": "https://pretalx.com/media/avatars/7HT9AG_lYxlUDi.webp", "biography": "CEO & Co-Founder, Genow.ai Former postdoctoral researcher at TU Darmstadt. Forbes 30 Under 30 (2025). Co-founded Genow.ai, an AI platform that consolidates fragmented enterprise knowledge. Raised a 1.65M Euro seed round led by High-Tech Gr\u00fcnderfonds (HTGF).", "public_name": "Sara Jourdan", "guid": "76102715-788e-5311-aa4d-e7170fa22b31", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/H9BQVG/"}, {"code": "CVREN7", "name": "Jovana Walter", "avatar": "https://pretalx.com/media/avatars/WYNY9W_2E47JRx.webp", "biography": "Managing Partner, Futury Capital CFA with 12 years of experience in debt and equity. Previously at PwC and IKB Deutsche Industriebank. At Futury Capital she manages venture capital funds investing in technology-driven startups across Germany and Europe.", "public_name": "Jovana Walter", "guid": "0060ef95-2e54-5821-91e5-168c8a760189", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/CVREN7/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/CNNUZC/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/CNNUZC/", "attachments": []}, {"guid": "b1f50fc0-a014-5c77-8d6a-381f02f28504", "code": "UFGB7T", "id": 95768, "logo": null, "date": "2026-04-15T18:10:00+02:00", "start": "18:10", "duration": "01:15", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-95768-lightning-talks-2", "url": "https://pretalx.com/pyconde-pydata-2026/talk/UFGB7T/", "title": "Lightning Talks 2", "subtitle": "", "track": null, "type": "Lightning Talks", "language": "en", "abstract": "Lightning Talks 2", "description": "Lightning Talks 2", "recording_license": "", "do_not_record": false, "persons": [], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/UFGB7T/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/UFGB7T/", "attachments": []}], "Titanium [2nd Floor]": [{"guid": "2b48e0ea-ab12-513e-86b5-5afa15bb36cb", "code": "V8DNCL", "id": 87288, "logo": null, "date": "2026-04-15T10:15:00+02:00", "start": "10:15", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-87288-wetterdienst-fast-unified-access-to-open-weather-data-with-polars", "url": "https://pretalx.com/pyconde-pydata-2026/talk/V8DNCL/", "title": "Wetterdienst: Fast, Unified Access to Open Weather Data with Polars", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk", "language": "en", "abstract": "Weather and environmental data power analytics, ML, and operations\u2014but APIs differ wildly and data prep is slow. Wetterdienst is a Python library that provides a unified, Polars\u2011first interface to multiple weather services (DWD, ECCC, EA, NOAA/NWS, Geosphere Austria, IMGW, Eaufrance, WSV, and more). It standardizes request patterns, returns tidy (long) data, converts to SI units, handles caching, timezones (UTC by default), and retries\u2014so teams can focus on analysis instead of plumbing. This talk introduces Wetterdienst\u2019s provider architecture, core request patterns, performance practices with Polars, and how to integrate via Python, CLI, or its REST API. We\u2019ll walk through real examples (station discovery, parameter selection, timeseries retrieval), exporting to databases, and patterns for robust pipelines in ETL and ML.", "description": "## Problem\r\nAccessing weather data means wrestling with inconsistent APIs, formats, and units\u2014slowing down data engineering and making pipelines hard to reproduce.\r\n\r\n## Solution\r\nWetterdienst is a Python library providing a unified, Polars-first interface to multiple open weather services (DWD, ECCC, EA, NOAA/NWS, Geosphere Austria, IMGW, Eaufrance, WSV, and more). It standardizes request patterns, returns tidy long-format data in SI units, and handles caching, timezones, and retries\u2014so teams can focus on analysis instead of plumbing.\r\n\r\n## Core concepts:\r\n- **Polars-first** \u2014 All data operations use Polars (v1.15+); pandas supported for some I/O\r\n- **Declarative request pattern** \u2014 Provider \u2192 stations \u2192 values; tidy/long output by default\r\n- **Sensible defaults** \u2014 UTC timestamps, SI units, humanized parameter names\r\n- **Reliability** \u2014 Disk-based caching via diskcache, stamina-based retries, timezone handling\r\n- **Provider architecture** \u2014 Consistent interfaces across DWD, ECCC, EA, NOAA/NWS, Geosphere, IMGW, Eaufrance, WSV, and more\r\n- **Multiple interfaces** \u2014 Python API, CLI, and REST\r\n\r\n## Outline\r\n- Introduction\r\n- Journey \u2014 How Wetterdienst came to life\r\n- Wetterdienst \u2014 Architecture, concepts, and request patterns\r\n- Value \u2014 What wetterdienst offers you, me and everyone else\r\n- Demo \u2014 Live: station discovery, timeseries retrieval, station metadata, climate stripes and more via app\r\n\r\n## Target Audience\r\nData engineers, scientists, and platform teams who need reliable weather data for analytics, ML, and operations.\r\n\r\n## Prerequisites\r\nBasic Python and DataFrame experience (Polars or pandas); familiarity with ETL/ML pipelines helpful.\r\n\r\n## Key Takeaways\r\n- A unified, Polars-first workflow to access and normalize open weather data\r\n- Practical patterns for station discovery, timeseries retrieval, unit conversion, and caching\r\n- How to integrate Wetterdienst via Python, CLI, and REST, and export to common formats and databases\r\n\r\n## Links\r\n\ud83d\udce6 Repo https://github.com/earthobservations/wetterdienst\r\n\ud83d\udcd6 Docs https://wetterdienst.readthedocs.io/\r\n\ud83c\udf10 App https://wetterdienst.eobs.org/\r\n\ud83d\udca1 Examples https://github.com/earthobservations/wetterdienst/tree/main/examples", "recording_license": "", "do_not_record": false, "persons": [{"code": "MVPZYU", "name": "Benjamin", "avatar": "https://pretalx.com/media/avatars/MVPZYU_A1lvjvC.webp", "biography": "Benjamin Gutzmann is a 32 year old Python/data engineer and maintainer of Wetterdienst, currently at Otto Group data.works (Data Engineer since 2023; previously Junior Data Engineer), working across Generative AI and data engineering on GCP with Python, SQL, Argo, and Terraform. He has built the Wetterdienst library at earth observations (hobby project, since 2018). Before his start into work life he has studied Hydrology (BSc, MSc) at TU Dresden.", "public_name": "Benjamin", "guid": "c8e74cc1-afbf-572a-bdcb-1fcd2e7303e3", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/MVPZYU/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/V8DNCL/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/V8DNCL/", "attachments": [{"title": "marimo demo", "url": "/media/pyconde-pydata-2026/submissions/V8DNCL/resources/wetterd_kO2xP4M.py", "type": "related"}, {"title": "presentation", "url": "/media/pyconde-pydata-2026/submissions/V8DNCL/resources/wetter_9G06nPg.pdf", "type": "related"}]}, {"guid": "f15029b8-c8c9-52d1-82a7-53103183778d", "code": "PARU7X", "id": 85760, "logo": null, "date": "2026-04-15T10:55:00+02:00", "start": "10:55", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-85760-from-struggling-to-mastery-a-practical-guide-to-data-pipeline-operations", "url": "https://pretalx.com/pyconde-pydata-2026/talk/PARU7X/", "title": "From Struggling to Mastery: A Practical Guide to Data Pipeline Operations", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk", "language": "en", "abstract": "How mature are your data pipeline operations? A Roadmap to Operational Excellence.\r\n\r\nData teams often struggle to scale their pipeline operations, trapped in a cycle of manual fixes and reactive fire-fighting. But what does \"good\" actually look like? In this talk, we introduce a standardized 5-level maturity model for Data Operations, focusing on three critical pillars: Orchestration, Data Quality, and Data SLOs.\r\n\r\nWe will deconstruct the journey from \"Struggling\" (manual scripts, no guarantees) to \"Mastery\" (automated, resilient, and measured). Attendees will leave with a concrete framework to assess their team\u2019s current standing and a clear, step-by-step roadmap to raise the bar toward operational excellence.", "description": "The Problem: The \"it works on my machine\" trap. As data teams grow, ad-hoc processes that worked for a single engineer crumble under the weight of production requirements. Teams often know they need to improve, but they lack a unified definition of success. Without clear standards, it is impossible to measure progress.\r\n\r\nThis talk presents a comprehensive Operational Excellence Maturity Pyramid, designed to guide data teams from chaos to stability. We will explore a 5-level classification system (Struggling, Basic, Decent, Strong, and Mastery) applied across three foundational pillars of data engineering.\r\n\r\n1. Orchestration Maturity We will move beyond simple cron jobs and local scripts.\r\n\r\n- Struggling: Manual scheduling, no dependency management, lack of idempotency.\r\n\r\n- Mastery: Dynamic DAGs, event-driven triggers, automated backfills, modular infrastructure-as-code, and self-healing pipelines and more.\r\n\r\n\r\n2. Data Quality Maturity Data trust is hard to gain and easy to lose. We will define how to shift from reactive to proactive quality management.\r\n\r\n- Struggling: No testing program; quality issues are discovered by stakeholders downstream.\r\n\r\n- Mastery: Comprehensive coverage (Write-Audit-Publish patterns), automated anomaly detection, and \"circuit breakers\" that stop bad data before it hits the warehouse.\r\n\r\n3. Data SLOs (Service Level Objectives) Maturity You cannot improve what you do not measure.\r\n\r\nStruggling: Undefined targets; \"best effort\" delivery.\r\n\r\nMastery: Fully measurable SLIs (Service Level Indicators), defined Error Budgets, and automated alerting on burn rates.\r\n\r\n-- What You Will Learn: This session is not just theoretical; it is a practical guide for data engineers, platform leads, and managers. By the end of this talk, you will be able to:\r\n\r\n- Audit your current stack: Use the provided scorecard to classify your team's maturity level in each pillar.\r\n\r\n- Identify gaps: Understand exactly why you are stuck at the \"Basic\" or \"Decent\" levels.\r\n\r\n- Plan your roadmap: Walk away with actionable steps to advance to the next level, turning your data operations into a competitive advantage rather than a maintenance burden.", "recording_license": "", "do_not_record": false, "persons": [{"code": "BAZU7P", "name": "Akif Cakir", "avatar": "https://pretalx.com/media/avatars/BAZU7P_TJRrR9U.webp", "biography": "I am a Data and AI enthusiast with over 14 years of experience across the full data lifecycle \u2014 from ingestion and transformation to analytics and machine learning operations.\r\n\r\nMy expertise spans modern data architecture, ETL/ELT pipelines, Big Data technologies, and cloud-native solutions. I have deep hands-on experience designing and implementing end-to-end data and ML pipelines that are reliable, scalable, and cost-efficient, driving value through automation and operational excellence.\r\n\r\nI\u2019m passionate about leveraging data and AI to create impactful, efficient, and intelligent systems that empower both business and technology teams.", "public_name": "Akif Cakir", "guid": "f536190f-e3af-559a-adcf-96404ab53980", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/BAZU7P/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/PARU7X/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/PARU7X/", "attachments": [{"title": "A Maturity Model for Data Pipeline Operations - PyCon26", "url": "/media/pyconde-pydata-2026/submissions/PARU7X/resources/A_Matu_a9BcMes.pdf", "type": "related"}]}, {"guid": "fc0cfa26-e848-5775-b7a3-f064ba6fc858", "code": "9MUDUY", "id": 87615, "logo": null, "date": "2026-04-15T11:35:00+02:00", "start": "11:35", "duration": "00:45", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-87615-empowering-data-scientists-with-zero-platform-friction-deploying-streamlit-friends-in-3-minutes", "url": "https://pretalx.com/pyconde-pydata-2026/talk/9MUDUY/", "title": "Empowering Data Scientists with Zero Platform Friction: Deploying Streamlit & Friends in 3 Minutes", "subtitle": "", "track": "PyCon: MLOps & DevOps", "type": "Talk (long)", "language": "en", "abstract": "A data scientist builds a Streamlit or Dash prototype, the business wants to validate it, and the hard parts begin: getting access to live data, making the app available company-wide, and ensuring every user only sees what they are allowed to see. Following \"best practices\" turn a simple demo into weeks of platform work, leaving data scientists frustrated and blocking them from shipping apps to end users.\r\n\r\nIn this talk we will **live-demo** Merck's self-service app service we have developed and hardened over multiple years. It lets **teams deploy Streamlit (and friends) in 3 minutes** while meeting best practices like SSO, CI/CD, and governed data access control. The platform has become essential for Merck to ship data apps at scale: in 2025 it powered **750+ active apps** reaching **8,000+ unique end users**.\r\n\r\n**Under the hood, we show:** how a use-case based access model enables scoped resource permissions so apps can safely access data on-behalf of the user. We also show starter templates that generate a deployable Git repo with example pages (e.g. Snowflake access or internal LLM chatbot). Finally, we cover the guardrails needed to operate this safely.\r\n\r\n**What you will learn:** a cost-effective reference architecture based on AWS that you can adapt to your hyperscaler or platform, practical patterns for balancing the trade-off between central control and decentral freedom, and how templates and CI/CD help teams iterate quickly without compromising security or reliability.", "description": "This session is for anyone who has built a Streamlit (or Dash, R Shiny, FastAPI, React) prototype and then hit the wall when it needed to be shared with real users: access to live data, SSO, permissioning, deployment, and operational guardrails.\r\n\r\nWe will present the workflow and the architecture from both sides: as a data scientist shipping an app, and as a platform admin operating the service safely at scale.\r\n\r\n## What we will demo\r\nWe will demo the end-to-end workflow from zero to a running app using our internal app service. The platform includes a web console for self-service provisioning and configuration and the deployment runtime managing the state of the application.\r\n\r\n- Using the web console to create and configure a new app from a framework template (Streamlit, Dash, R Shiny, FastAPI, React).\r\n- How a Git repository is created and the first version is deployed behind the scenes, including a working starter app with example pages.\r\n\r\n## Key design decisions (the parts that are usually hard)\r\n- Identity propagation: the app receives the signed-in user identity from SSO and uses it for downstream authorization.\r\n- Authorization at the data layer: dataset permissions are scoped to use-case resource, making sure tokens can not be exploited.\r\n- Safe multi-tenancy: per-app isolation plus resource limits to prevent noisy-neighbor problems.\r\n- Repeatable delivery: templates plus CI/CD conventions so a new app starts from a working, deployable baseline.\r\n- Day-2 operations: guardrails like quotas, rate limiting, and idle shutdown to keep the platform reliable and cheap.\r\n\r\n## Running at scale\r\n- Production usage: 750+ active apps and 8k+ unique end users (2025).\r\n- Infrastructure run rate under 10k USD per month (excluding engineering time).\r\n\r\n## Who should attend\r\n- Data scientists and analysts who want to ship apps beyond a demo.\r\n- Data platform and DevOps engineers building self-service tooling for governed environments.\r\n- Teams standardizing how internal data & AI products are delivered to business users.\r\n\r\n## Takeaways\r\n- For data scientists: what a good internal app hosting platform should provide, and which requirements you should ask your platform team for (governed on-behalf of data access, templates, CI/CD, guardrails).\r\n- For platform teams: a blueprint you can adapt beyond AWS, including the architecture and tradeoffs necessary to operate fine-grained authorization and a multi-tenant runtime at scale.\r\n\r\n**If you do not have such an app platform in your company yet, use this talk as a checklist to start the conversation with your IT or platform teams. :-)**", "recording_license": "", "do_not_record": false, "persons": [{"code": "GGSPQV", "name": "Bernhard Sch\u00e4fer", "avatar": "https://pretalx.com/media/avatars/GGSPQV_Xi0ejAz.webp", "biography": "Bernhard is a Senior Data Scientist at Merck with a PhD in deep learning and over 7 years of experience in applying data science and data engineering within different industries. For more information you can connect with him on LinkedIn. \ud83d\ude42", "public_name": "Bernhard Sch\u00e4fer", "guid": "9d1c14ab-4f68-59d3-9956-689c2c264de4", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/GGSPQV/"}, {"code": "G7TJBW", "name": "Nicolas Renkamp", "avatar": "https://pretalx.com/media/avatars/G7TJBW_zb3ZHEB.webp", "biography": "As the Global Head of Platform Products Portfolio, Nicolas leads high performing teams that design, implement and maintain Merck's global data, analytics and AI ecosystem UPTIMIZE.", "public_name": "Nicolas Renkamp", "guid": "c0e002e2-fe89-5922-a079-55219e87981c", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/G7TJBW/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/9MUDUY/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/9MUDUY/", "attachments": [{"title": "Presentation Slides", "url": "/media/pyconde-pydata-2026/submissions/9MUDUY/resources/pydata_FtJyjpe.pdf", "type": "related"}]}, {"guid": "77e0be85-cba8-5f79-8d9b-a888fb943792", "code": "ZESFRG", "id": 88382, "logo": null, "date": "2026-04-15T14:20:00+02:00", "start": "14:20", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-88382-learnings-building-devops-as-a-software-engineer", "url": "https://pretalx.com/pyconde-pydata-2026/talk/ZESFRG/", "title": "Learnings Building DevOps as a Software Engineer", "subtitle": "", "track": "PyCon: MLOps & DevOps", "type": "Talk", "language": "en", "abstract": "When I joined my current company as a software engineer, I encountered a blank slate: no CI/CD pipelines, no deployment infrastructure, barely any monitoring\u2014in short, no software infrastructure at all. This talk shares the key learnings from building a DevOps environment from the ground up. I\u2019ll walk through the essentials: which foundations were laid first, what tools and practices made the difference, and how automation became a daily habit. Through real-world examples, I will demonstrate how pragmatic and incremental steps can jump-start productivity, reduce manual toil, and help teams avoid common pitfalls.", "description": "What do you do when you join a company as a software engineer, and there\u2019s zero DevOps in place\u2014but product delivery can\u2019t wait? In this talk, I\u2019ll share firsthand insights from building core DevOps infrastructure from the ground up, while simultaneously delivering the first software products under tight deadlines.\r\nI\u2019ll outline the key priorities and quick wins that enabled rapid, reliable releases\u2014such as setting up basic CI/CD pipelines, introducing automated tests, and using containerization for reproducible deployments. Rather than aiming for \u201cperfect\u201d infrastructure from day one, I\u2019ll show how to build DevOps foundations incrementally and pragmatically, integrating automation step by step as part of everyday development work.\r\nThrough practical examples, I\u2019ll discuss how to achieve reliability without losing agility, how to avoid common pitfalls in \u201cbuild as you go\u201d DevOps, and how to balance product delivery with infrastructure improvements. Attendees will leave with actionable tips on how to bootstrap DevOps quickly, so teams can ship software confidently\u2014even when starting from scratch.", "recording_license": "", "do_not_record": false, "persons": [{"code": "RCAW7T", "name": "Gaweng Tan", "avatar": "https://pretalx.com/media/avatars/RCAW7T_fl0DrdO.webp", "biography": "I am a Software Architect at a manufacturing company, specializing in building reliable software products and establishing solid DevOps practices\u2014often from the ground up. My ongoing work with Python spans automation, scripting, and infrastructure, helping me to quickly deliver solutions even in \u201cgreenfield\u201d situations.\r\nCuriosity drives much of what I do\u2014I\u2019m always eager to understand how things work and love tackling technical challenges through hands-on experimentation. When I\u2019m not engineering or optimizing workflows, you\u2019ll find me exploring new recipes in the kitchen, running small coding side projects, or discovering the world in my own sometimes-cautious, adventure-seeking way.\r\nOutside of work, I enjoy deep conversations about technology and society, and occasionally share my thoughts and experiments on my personal blog. I like to think individuality and curiosity matter as much in tech as they do in everyday life.", "public_name": "Gaweng Tan", "guid": "c688a4d8-aa9b-57de-9828-a16fe26c0bd2", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/RCAW7T/"}], "links": [{"title": "Slides", "url": "https://slides.gtan.eu/slides/pycon-learnings-devops/", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/ZESFRG/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/ZESFRG/", "attachments": []}, {"guid": "f655878e-853d-5be5-8442-10d22385bf91", "code": "ZLRFR9", "id": 87911, "logo": null, "date": "2026-04-15T15:00:00+02:00", "start": "15:00", "duration": "00:45", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-87911-architecture-under-constraints-designing-systems-that-still-evolve", "url": "https://pretalx.com/pyconde-pydata-2026/talk/ZLRFR9/", "title": "Architecture Under Constraints: Designing Systems That Still Evolve", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk (long)", "language": "en", "abstract": "Most systems are built under constraints: legacy code, regulation, organizational boundaries, and long-term accountability. This talk explores how Staff+ engineers and tech leads can make sound architectural decisions when \u201cperfect\u201d isn\u2019t an option. Focusing on platforms and tooling, it presents practical ways to identify real constraints, preserve flexibility, avoid over-engineering, and communicate trade-offs that hold up over time - technically and organizationally.", "description": "Modern systems rarely exist in ideal conditions. They grow over years, integrate with legacy services, operate under regulatory or security constraints, and are shaped by organizational boundaries just as much as by code. Yet architectural guidance often assumes greenfield projects and unlimited freedom.\r\n\r\nThis talk focuses on architectural decision-making under real-world constraints, using  systems as the primary lens. Rather than discussing specific frameworks or patterns, it presents a practical way of thinking about architecture when trade-offs are unavoidable and decisions must hold up over time.\r\n\r\nDrawing from experience in regulated production environments, we will explore how to distinguish true constraints from accidental ones, how to think in terms of long-lived capabilities rather than short-lived components, and how to preserve optionality even when systems appear \u201clocked in.\u201d Examples will touch on Python-heavy platforms such as backend services, internal tools, data pipelines, and automation systems.\r\n\r\nThe session also addresses the human side of architecture: how Staff+ engineers and technical leaders communicate trade-offs, document decisions in a way that survives team changes, and align engineering, product, and compliance perspectives without over-engineering.\r\n\r\nThis talk is aimed at experienced engineers, tech leads, and engineering leaders who want to design systems that can evolve - even when constraints dominate the problem space.", "recording_license": "", "do_not_record": false, "persons": [{"code": "XFAALW", "name": "Eduard Thamm", "avatar": "https://pretalx.com/media/avatars/XFAALW_1ZY3mZ6.webp", "biography": "Eduard is a technical leader with a background in distributed systems, platform engineering, and security. He works as a Lead Engineer in regulated environments, designing and operating Kubernetes-based platforms where reliability, compliance, and developer experience must coexist. His work focuses on architecture under real-world constraints, supply-chain security, and building systems that remain adaptable over time. Eduard regularly advises engineering leaders on technical strategy and decision-making at scale, bridging hands-on experience with long-term architectural thinking.", "public_name": "Eduard Thamm", "guid": "0f192152-39f4-5fd9-91ae-529afb7c9647", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/XFAALW/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/ZLRFR9/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/ZLRFR9/", "attachments": [{"title": "Slides as PDF", "url": "/media/pyconde-pydata-2026/submissions/ZLRFR9/resources/slides_A5GOlhF.pdf", "type": "related"}]}, {"guid": "ed63519b-068e-5ed5-a0d0-670e9ccad885", "code": "EXXWMV", "id": 86669, "logo": null, "date": "2026-04-15T16:15:00+02:00", "start": "16:15", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-86669-black-hole-stars-an-astronomical-mystery-mostly-solved-with-numpyro-and-jax", "url": "https://pretalx.com/pyconde-pydata-2026/talk/EXXWMV/", "title": "Black Hole Stars: An Astronomical Mystery (Mostly) Solved with NumPyro and JAX", "subtitle": "", "track": "PyData: PyData & Scientific Libraries Stack", "type": "Talk", "language": "en", "abstract": "The James Webb Space Telescope has revealed a mysterious population of \"Little Red Dots\": extremely distant objects that have upended our understanding of the early Universe. However, revealing the true nature of these marvels requires computationally-intensive statistical modeling of complex astronomical data. In this talk, we explore how we used JAX and NumPyro to help solve this puzzle. We will introduce these powerful Python tools, demonstrate how they accelerate complex statistical data analysis, and show how they provided evidence that Little Red Dots may in fact be \"Black Hole Stars.\"", "description": "The James Webb Space Telescope (JWST) has transformed extragalactic astronomy. In particular, it has uncovering a puzzling population of compact, red objects in the distant universe known as \"Little Red Dots\" (LRDs). These sources exhibit distinctive features not seen in typical galaxies and therefore their nature remains a subject of intense debate. Are they supermassive black holes hiding behind screens of dust? Massive dead galaxies appearing too early in the universe? Or something entirely new? To find out, we need to perform computationally heavy statistical analysis on the astronomical data. However traditional tools have been too slow or made many assumptions to reduce the complexity of the JWST data that can lead to inaccurate results.  \r\n\r\nIn this talk, I will introduce the modern Python stack that now makes this possible: JAX and NumPyro. JAX allows you to write standard Python code that runs on GPUs and automatically computes derivatives, while NumPyro leverages that power for incredibly fast statistical modeling. We will start with the basics, using simple examples to demonstrate how JAX can speed up existing workflows and how NumPyro makes Bayesian inference accessible. \r\n\r\nThen, we will look at the \"Little Red Dot\" mystery as a case study. I will show how we built a custom inference engine (`unite`) to process thousands of JWST observations. By leveraging JAX's speed and NumPyro's flexibility, we were able to efficiently and accurately test complex physical models against the data, uncovering evidence that these unique may in fact be supermassive black holes embedded in dense gas clouds: essentially, stars powered not by fusion but by black holes.\r\n\r\nThis talk is for anyone interested in high-performance Python and especially for (data) scientists interested in modern scientific methods in designing scalable inference pipelines. You will leave with a solid introduction to JAX and NumPyro and an appreciation for how these tools are already helping solve the Universe's greatest mysteries.", "recording_license": "", "do_not_record": false, "persons": [{"code": "QHTKP7", "name": "Raphael Hviding", "avatar": "https://pretalx.com/media/avatars/QHTKP7_pundS4V.webp", "biography": "Dr. Raphael Hviding is Astronomer working at the Max-Planck Institute for Astronomy. He is a member of the Data Science and Galaxies & Cosmology Departments. He works on problems related to complex data analysis from the world's frontier observatories as well as the applications of data science to solving astronomical mysteries. Originally from the USA, he obtained his PhD from Steward Observatory at the University of Arizona working on insights Dust-Obscured Supermassive Black Holes from large Astronomical Surveys. \r\nHe now lives in Heidelberg with his wife and three cats, enjoys cycling, bouldering, and building computers.", "public_name": "Raphael Hviding", "guid": "10617cd9-a4f5-5195-83de-0a2370be323f", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/QHTKP7/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/EXXWMV/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/EXXWMV/", "attachments": [{"title": "Slides (with reduced image quality to meet size requirements)", "url": "/media/pyconde-pydata-2026/submissions/EXXWMV/resources/BlackH_oTB1Lvu.pdf", "type": "related"}]}, {"guid": "73d611d3-da23-5af9-94bf-fdefae9c2379", "code": "LVRLSU", "id": 88284, "logo": null, "date": "2026-04-15T16:55:00+02:00", "start": "16:55", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-88284-you-are-an-intelligent-business-analyst-how-i-learned-to-talk-to-business", "url": "https://pretalx.com/pyconde-pydata-2026/talk/LVRLSU/", "title": "\"You are an intelligent business analyst\": how i learned to talk to business", "subtitle": "", "track": "General: Education, Career & Life", "type": "Talk", "language": "en", "abstract": "Developers don\u2019t need to become business analysts, but they do need business skills. This talk shows how learning to communicate with stakeholders, uncover real business needs, and bridge gaps between tech and business can dramatically increase your impact. Learn practical techniques to become a trusted technical partner and deliver solutions that truly matter.", "description": "I never planned to become a business analyst. In fact, I avoided it. I imagined endless meetings, unclear requirements, and conversations that had nothing to do with \u201creal\u201d technical work. I wanted to stay hands-on as a developer and data scientist.\r\n\r\nBut reality proved something important: you can't escape the business side if you want to build meaningful solutions. And once I learned how to talk to business stakeholders, everything changed: my impact, my influence, and the outcomes of the projects I worked on.\r\n\r\nIn this talk, we\u2019ll explore the practical business skills every developer needs but is rarely taught:\r\n\u2022 How to identify key stakeholders and understand what they really want\r\n\u2022 How to navigate communication in international, cross-functional teams\r\n\u2022 How to uncover business pain points before they become blockers\r\n\u2022 How to fix broken communication loops\r\n\u2022 How to become the go-to technical partner the business trusts\r\n\r\nBy the end, you won\u2019t just see yourself as a strong technical contributor: you\u2019ll see how to position yourself as an essential part of the broader business ecosystem, shaping better decisions and delivering solutions that truly matter.", "recording_license": "", "do_not_record": false, "persons": [{"code": "37RKBL", "name": "Darya Petrashka", "avatar": "https://pretalx.com/media/avatars/37RKBL_eXnT2cW.webp", "biography": "Darya Petrashka is a Senior Data Scientist at SLB with 6 years of experience, focusing on NLP and GenAI. She is passionate about using data for problem-solving, with a strong interest in AWS services. An AWS Community Builder, Darya actively shares her expertise through public speaking at various industry events, including AWS Community Days, Summits, and PyCons. A dedicated learner, Darya continually hones her skills by participating in workshops, courses, and tech schools.", "public_name": "Darya Petrashka", "guid": "0915c6fa-61c8-5ce2-86bc-7fa6780fae0a", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/37RKBL/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/LVRLSU/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/LVRLSU/", "attachments": [{"title": "slides in the PDF format", "url": "/media/pyconde-pydata-2026/submissions/LVRLSU/resources/how_i__eqlrVop.pdf", "type": "related"}]}, {"guid": "54a08772-3dcf-58dc-b83e-30e155472fff", "code": "QB7VLW", "id": 88448, "logo": null, "date": "2026-04-15T17:35:00+02:00", "start": "17:35", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-88448-ty-mypy-the-new-generation-of-python-type-checking", "url": "https://pretalx.com/pyconde-pydata-2026/talk/QB7VLW/", "title": "Ty mypy:  The New Generation of Python Type Checking", "subtitle": "", "track": "PyCon: Python Language & Ecosystem", "type": "Talk", "language": "en", "abstract": "Python\u2019s static typing ecosystem has long been shaped by mypy, but a new contender has entered the space: ty, a high-performance type checker from Astral that has recently exited alpha. With a focus on speed, modern ergonomics, and tight tooling integration, Ty represents a new direction for Python type checking.\r\n\r\nIn this talk, we\u2019ll explore what ty looks like in practice. We\u2019ll cover its core features, how it behaves on real-world codebases, and what changes when type checking becomes fast enough to run constantly. We\u2019ll also compare ty directly with mypy, highlighting strengths, limitations, and trade-offs teams should understand before adopting it.\r\n\r\nThis session will help Python developers evaluate whether ty is ready for production use today\u2014and what it suggests about the future of Python typing tools.", "description": "Static typing in Python has matured significantly over the past decade, with mypy becoming the de facto standard for many teams. At the same time, developers continue to struggle with slow feedback loops, noisy errors, and friction in CI and local workflows. ty, a new type checker from Astral.sh, aims to address these issues with a fundamentally different set of design priorities\u2014and it has now reached a post-alpha, production-ready stage.\r\n\r\nThis talk takes a practical, experience-based look at ty from the perspective of a Python developer using it on real code. We\u2019ll start by briefly reviewing the current state of Python type checking and the problems that motivated ty\u2019s design. From there, we\u2019ll dive into ty\u2019s feature set, performance characteristics, and developer experience, focusing on what actually changes when type checking becomes fast and ergonomic enough to feel \u201calways on.\u201d\r\n\r\nA central part of the talk will be a direct comparison with mypy: where ty already excels, where it behaves differently, and where mypy remains the better choice today. Rather than framing this as a replacement story, we\u2019ll explore the trade-offs between the two tools and what kinds of teams benefit most from each.\r\n\r\nBy the end of the session, attendees will have a clear mental model of how ty works, how mature it is today, and whether it\u2019s a good fit for their own projects. More broadly, we\u2019ll look at what ty signals about the future direction of Python\u2019s typing ecosystem.", "recording_license": "", "do_not_record": false, "persons": [{"code": "KGBECS", "name": "Stefan Kraus", "avatar": "https://pretalx.com/media/avatars/KGBECS_349mw3C.webp", "biography": "Data Engineer / ML Engineer at inovex GmbH.\r\nI\u2019m passionate about building innovative and impactful digital solutions and sharing practical insights that create sustainable value for customers and teams.", "public_name": "Stefan Kraus", "guid": "44ecd550-fd30-5a1e-8d5e-84e5f7ac026f", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/KGBECS/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/QB7VLW/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/QB7VLW/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/QB7VLW/resources/ty_myp_bgutOrP.pdf", "type": "related"}]}], "Helium [3rd Floor]": [{"guid": "732bab5f-1c73-55d0-b593-e0c8e4379646", "code": "93SXWY", "id": 86058, "logo": null, "date": "2026-04-15T10:15:00+02:00", "start": "10:15", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-86058-ship-data-with-confidence-declarative-validation-for-pyspark-pandas", "url": "https://pretalx.com/pyconde-pydata-2026/talk/93SXWY/", "title": "Ship Data with Confidence: Declarative Validation for PySpark & Pandas", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk", "language": "en", "abstract": "Tired of data quality issues crashing your PySpark and Pandas pipelines? This talk introduces [dataframe-expectations](https://github.com/getyourguide/dataframe-expectations), a lightweight, open-source library for declarative data validation. We will dive into the library's design and demonstrate how to easily define and apply data quality expectations to catch errors early, reduce debugging time, and ship more reliable data products, faster. Learn to build more robust data pipelines and move from reactive problem-solving to proactive data validation.", "description": "This session introduces a practical, open-source solution to a critical challenge facing data engineers and scientists: how to proactively guarantee data quality. In today's fast-paced development cycles, data pipelines are increasingly complex and reliant on numerous upstream sources, elevating the risk of data quality issues that have the potential to cause production failures. While monitoring and alerting systems are essential for flagging these failures, they are fundamentally reactive; their value is entirely dependent on the quality and coverage of the underlying validation logic that engineers must build and maintain. The true goal is to shift from reactive clean-up to proactive prevention. This talk demonstrates a more effective approach: stopping bad data from ever reaching production by embedding clear, declarative validation directly into your data pipelines. This provides immediate visibility into errors, allowing you to catch and fix data quality issues at the earliest possible stage of development.\r\n\r\n[dataframe-expectations](https://github.com/getyourguide/dataframe-expectations) is an attempt to address this problem through a lightweight, open-source Python library designed for declarative data validation in both PySpark and Pandas. This session will explore the key design choices behind its implementation and architecture, including its lightweight nature, which ensures the library doesn't become a bottleneck by impacting CI/CD run times or bloating container image sizes, making it ideal for data pipelines, unit tests and end-to-end tests alike. Through examples, we will walk through its fluent, chainable API and showcase its extensive list of reusable, parameterized expectations. We will then dive into advanced features, including powerful decorator-based validation that seamlessly integrates quality checks into your existing code, and a flexible tag-based filtering system that allows you to dynamically decide which expectations to run at runtime.\r\n\r\nAttendees will leave with a clear, actionable strategy for integrating declarative data quality checks into their pipelines, understanding how a simple, extensible tool can dramatically increase the reliability of their data products and, ultimately, their development velocity.", "recording_license": "", "do_not_record": false, "persons": [{"code": "7LMGT3", "name": "Ryan Sequeira", "avatar": "https://pretalx.com/media/avatars/7LMGT3_v5y5MlX.webp", "biography": "As a Data Scientist on the Traveler Data Products team at GetYourGuide, I have spent the last 4 years developing and refining the ranking and relevance systems that power one of the world's leading travel experience platforms. My work is focused on enhancing the traveler's journey, helping millions discover and book their ideal experiences through data-driven solutions.\r\n\r\nMy path to data science is built on a foundation of diverse technical experience. I began my career in 2013 as a backend developer in Pune, India, before pursuing a Master's in Computer Science at the Indian Institute of Technology Patna, where I specialised in Network Science. Following my studies, I continued at the institute for two years as a research assistant, further honing my expertise in Network Science, which paved my way into the field of data science.\r\n\r\nIn 2021, I relocated to Berlin to join GetYourGuide, where I apply my software engineering background and machine learning skills to solve real-world problems at scale. This blend of backend development experience, academic research, and industry application gives me a unique perspective on building robust, production-ready data solutions.", "public_name": "Ryan Sequeira", "guid": "d5dad103-43b4-5d69-b617-be51a8c8ed9c", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/7LMGT3/"}], "links": [{"title": "Presentation", "url": "https://docs.google.com/presentation/d/1UiGjgexfaEfXyjIy02cbY_T7QBpVJ189SpHbxb_fvOU", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/93SXWY/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/93SXWY/", "attachments": []}, {"guid": "cc87bd38-c291-549c-9c41-81f150e69bce", "code": "B8KVNJ", "id": 87640, "logo": null, "date": "2026-04-15T10:55:00+02:00", "start": "10:55", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-87640-accuracy-is-overrated-ship-stable-forecasts-without-lying-to-yourself", "url": "https://pretalx.com/pyconde-pydata-2026/talk/B8KVNJ/", "title": "Accuracy Is Overrated: Ship Stable Forecasts (Without Lying to Yourself)", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk", "language": "en", "abstract": "Forecasting talks love a clean ending: \u201cand then we improved WMAPE by 3.7%.\u201d\r\nNice. Now put that model into production without suffering from instability.\r\n\r\nYou retrain your model on a few new weeks of data and suddenly the one-year forecast jumps 15\u201320%. Planning teams redo decisions, trust erodes, and your \u201caccurate\u201d model becomes unusable. This talk is about forecast stability: how much forecasts change when you add new data and rerun the same pipeline.\r\n\r\nWe run a simple experiment: train a model, forecast one year ahead, add recent data, retrain, and measure forecast-to-forecast change. We repeat this across common forecasting approaches including ETS/ARIMA, Prophet, XGBoost with lag features, AutoGluon ensembles, neural/global models, and TimeGPT-style APIs.\r\n\r\nYou will see that high accuracy does not guarantee usable forecasts, and that some models are systematically more volatile than others. We then cover practical ways to stabilise forecasts without freezing them, focusing on reconciliation and ensembling (including origin ensembling).\r\n\r\nThis talk is for forecasting practitioners who want models users actually trust, not just good metrics.", "description": "Forecasting talks love a clean ending: \u201cand then we improved WMAPE by 3.7%.\u201d\r\nNice. Now put that model into production without suffering from instability.\r\n\r\nBecause here is what users actually see: the forecast changes every week. The \u201cone-year view\u201d jumps 15 to 20 percent because you retrained on three extra Mondays. Planning teams redo decisions. Operations loses trust. Your model becomes an expensive random-number generator with excellent dashboards.\r\n\r\nThis talk is about forecast stability: how much your future forecast moves when you add a small amount of new data, retrain, and run the same pipeline again. Not error versus actuals. Forecast versus forecast.\r\n\r\nYou will see a simple but uncomfortable experiment: \r\n\r\n- Taking a demand-style time series dataset with seasonality, promotions, and noise (Kaggle competition style).\r\n- Training a model and produce a one-year-ahead forecast.\r\n- Adding a few recent weeks of data, retrain, forecast again.\r\n- Measuring how much the overlapping horizon changed.\r\n\r\nWe repeat this across model families people actually use:\r\n\r\n- Statistical baselines like ETS and ARIMA\r\n- Prophet\r\n- Feature-based ML with lag features such as XGBoost\r\n- AutoML and ensembles with AutoGluon TimeSeries\r\n- Neural and global models where relevant\r\n- And yes, what happens when you add an API model like TimeGPT into the mix (no hype, just behaviour under updates)\r\n\r\nYou will see something totally \"unexpected\": a model can be \u201caccurate\u201d and still be operationally useless because its forecast revisions are chaotic. And you will see the opposite too: models with slightly worse headline accuracy that people actually trust, because next year does not get rewritten every week.\r\n\r\nThis is not a philosophical debate. It is a measurable property of forecasting systems that most teams never track.\r\n\r\nSo what do we do about it?\r\nWe focus on techniques that improve stability without turning forecasts into fossils:\r\n\r\n1) Reconciliation\r\nHierarchical and temporal reconciliation as a stabiliser, not just a coherence tool. If SKU-level forecasts panic while higher-level signals stay calm, reconciliation can prevent nonsense from propagating into decisions.\r\n\r\n2) Ensembling and origin ensembling\r\nCombining models is not only about accuracy. Averaging forecasts across models and across forecast origins dampens noise and makes forecast updates behave like signals instead of mood swings.\r\n\r\nWho this talk is for:\r\n\r\nForecasting practitioners, data scientists working on demand forecasting, and anyone who has ever heard: \u201cYour model looks good, but I don\u2019t trust it.\u201d\r\n\r\nWhat you\u2019ll take away:\r\n\r\n- A methodology to measure forecast stability using forecast-to-forecast change.\r\n- A mental model for when forecast revisions are useful and when they are just noise.\r\n- Practical patterns you can implement immediately in Python to make forecasts calmer without hiding real change.\r\n\r\nIf you optimise only accuracy metrics, you are grading homework.\r\nIf you care about stability, you are building a forecasting product.", "recording_license": "", "do_not_record": false, "persons": [{"code": "HZFYWE", "name": "Illia Babounikau", "avatar": null, "biography": "Dr. Illia Babounikau is an accomplished data scientist with extensive expertise in machine learning and forecasting. He holds a Ph.D. in Physics from Hamburg University and initially pursued an academic career, focusing on large-scale data analysis and machine learning applications. His contributions have been instrumental in international scientific collaborations, including the CMS experiment at CERN\u2019s Large Hadron Collider and the COMET project at J-PARC.\r\n\r\nFor the past five years, Dr. Babounikau has been a Data Scientist at Blue Yonder and VOIDS, specializing in developing and fine-tuning advanced forecasting models for retail planning and inventory management. He leads the design and implementation of tailored machine-learning solutions, addressing complex challenges within supply chains across diverse industries.\r\n\r\nDr. Babounikau is passionate about bridging the gap between data science and business strategy, ensuring machine learning models are aligned with business objectives to drive data-informed decision-making.", "public_name": "Illia Babounikau", "guid": "7c7d8c47-e365-583f-aad8-cae3ae6b6a51", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/HZFYWE/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/B8KVNJ/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/B8KVNJ/", "attachments": [{"title": "talk", "url": "/media/pyconde-pydata-2026/submissions/B8KVNJ/resources/pycon__L0FZSqd.pdf", "type": "related"}]}, {"guid": "596719ff-d231-534f-969e-d7d1bd4f0efc", "code": "Q9DU8N", "id": 85593, "logo": null, "date": "2026-04-15T11:35:00+02:00", "start": "11:35", "duration": "00:45", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-85593-causal-inference-through-the-lens-of-probabilistic-programming", "url": "https://pretalx.com/pyconde-pydata-2026/talk/Q9DU8N/", "title": "Causal Inference through the lens of probabilistic programming", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk (long)", "language": "en", "abstract": "Causal inference asks the hardest question in data science: \"What would have happened if things were different?\" While traditional methods often rely on rigid rules, statistical tests or \"black box\" adjustments, Probabilistic Programming Languages (PPLs) like PyMC and NumPyro offer a transparent, flexible, and powerful lens to view these problems.\r\n\r\nIn this talk, we move beyond the standard \"correlation is not causation\" disclaimer. We will build a unified workflow that starts with robust A/B testing, moves to bias adjustment in observational data using multilevel models, and culminates with advanced Deep Causal Latent Variable Models (CEVAE).", "description": "Why should you use a Probabilistic Programming Language (PPL) for Causal Inference? Because causal problems are inherently about uncertainty and structure\u2014two things PPLs handle natively.\r\n\r\nIn this session, we will demonstrate how to translate causal diagrams (DAGs) directly into code, using PyMC and NumPyro to estimate causal effects with rigorous uncertainty quantification. We will cover three distinct levels of complexity, drawing on real-world examples and recent research:\r\n\r\n1. The \"Simple\" Case: Enhancing A/B Tests Even in randomized experiments, PPLs provide massive value. We will show how to:\r\n\r\n    - Use Prior Predictive Checks to prevent \"silly\" estimates (Twyman's Law) by incorporating domain knowledge into priors (e.g., preventing the model from predicting a 1000% lift). We also describe how to perform a *power* analysis in a Bayesian framework.\r\n\r\n    - Implement Bayesian CUPED to reduce variance and increase statistical power without collecting more data. We can combine these variance-reduction methods with smarter priors as described above.\r\n\r\n2. The Observational Challenge: Confounding & Structure When we can't randomize, we must adjust. We will explore (through concrete examples):\r\n\r\n    - Backdoor Adjustment: Show how PPLs implement the \"do-operator\" to estimate Average Treatment Effects (ATE) in the presence of observed confounders.\r\n\r\n    - Multilevel Causal Models: Demonstrate how to use multilevel models to account for time-invariant unobserved confounders. We discuss the pros and cons compared with similar methods, such as fixed effects. \r\n\r\n3. The Frontier: Deep Latent Variable Models: What if confounders are unobserved? We will introduce advanced methods combining Deep Learning with Probabilistic Programming:\r\n\r\n    - An introduction to the Causal Effect Variational Autoencoder (CEVAE).\r\n\r\nBy the end of this talk, you will understand how to view causal inference not as a collection of isolated statistical tricks, but as a coherent modeling process powered by probabilistic programming.\r\n\r\n### References\r\n\r\n* **A/B Testing & Priors:** [Prior Predictive Checks for Metric Lift](https://juanitorduz.github.io/prior_predictive_ab_testing/) & [Power Analysis](https://juanitorduz.github.io/power_sample_size_exclude_null/)\r\n* **Variance Reduction:** [Bayesian CUPED](https://juanitorduz.github.io/bayesian_cuped/)\r\n* **Observational Data:** [Introduction to Causal Inference with PyMC](https://juanitorduz.github.io/intro_causal_inference_ppl_pymc/)\r\n* **Hierarchical Models:** [Multilevel Causal Inference](https://juanitorduz.github.io/ci_multilevel/)\r\n* **CEVAE Paper:** Louizos, C., Shalit, U., Mooij, J., Sontag, D., Zemel, R., & Welling, M. (2017). [Causal Effect Inference with Deep Latent-Variable Models](https://arxiv.org/abs/1705.08821).\r\n* **Code Reference:** Adapting concepts from *CausalML* (Robert Osazuwa Ness), specifically [Chapter 11: Bayesian Causal Graphical Inference](https://github.com/altdeep/causalML/blob/master/book/chapter%2011/Chapter_11_Bayesian_Causal_Graphical_Inference.ipynb).", "recording_license": "", "do_not_record": false, "persons": [{"code": "ADJDMC", "name": "Dr. Juan Orduz", "avatar": "https://pretalx.com/media/avatars/ADJDMC_OcS7UZk.webp", "biography": "Mathematician (Ph.D., Humboldt Universit\u00e4t zu Berlin) and data scientist. I am interested in interdisciplinary applications of mathematical methods, particularly time series analysis, Bayesian methods, and causal inference. Active open source developer (PyMC, PyMC-Marketing, and NumPyro, among others). For more info, please visit my personal website https://juanitorduz.github.io", "public_name": "Dr. Juan Orduz", "guid": "bce64d59-705c-50b3-a6de-415e9b8a5ad1", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/ADJDMC/"}], "links": [{"title": "slides", "url": "https://juanitorduz.github.io/html/causal_inference_ppl.html", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/Q9DU8N/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/Q9DU8N/", "attachments": []}, {"guid": "5f32ff9d-8893-5e2e-ae8b-5572d68a51dc", "code": "AZ7GD3", "id": 87300, "logo": null, "date": "2026-04-15T14:20:00+02:00", "start": "14:20", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-87300-small-language-models-for-tool-calling-are-better-than-you-think", "url": "https://pretalx.com/pyconde-pydata-2026/talk/AZ7GD3/", "title": "Small Language Models for Tool Calling Are Better Than You Think", "subtitle": "", "track": "PyData: Natural Language Processing & Audio (incl. Generative AI NLP)", "type": "Talk", "language": "en", "abstract": "Large language models have been widely used in tool-calling workflows thanks to their strong performance in generating appropriate function calls. However, due to their size and cost, they are inaccessible to small-scale builders, and server-side computing makes data privacy challenging. Small language models (SLMs) are a promising, affordable alternative that can run on local hardware, ensuring higher privacy.\r\n\r\nUnfortunately, SLMs struggle with this task - they pass wrong arguments when calling functions with many parameters, and make mistakes when the conversation spans multiple turns. On the other hand, for production applications with specific API sets, we often don't need general-purpose LLMs - we need reliable, specialized models.\r\n\r\nThis talk demonstrates how to increase the accuracy of SLMs (under 8B parameters) for custom tool calling tasks. We will share how leveraging knowledge distillation helps to get the most out of SLMs in low-data settings - they can even outperform LLMs! We will present the whole pipeline from data generation, fine-tuning, and local deployment.", "description": "Large language models have been widely used in tool-calling workflows thanks to their strong performance in generating appropriate function calls. However, due to their size and cost, they are inaccessible to small-scale builders, and server-side computing makes data privacy challenging. Small language models (SLMs) are a promising, affordable alternative that can run on local hardware, ensuring higher privacy.\r\n\r\nUnfortunately, SLMs struggle with this task - they pass wrong arguments when calling functions with many parameters, and make mistakes when the conversation spans multiple turns. On the other hand, for production applications with specific API sets, we often don't need general-purpose LLMs - we need reliable, specialized models.\r\n\r\nThis talk demonstrates how to increase the accuracy of SLMs (under 8B parameters) for custom tool calling tasks. We will share how leveraging knowledge distillation helps to get the most out of SLMs in low-data settings - they can even outperform LLMs! We will present the whole pipeline from data generation, fine-tuning, and local deployment.\r\n\r\n**What you'll learn:**\r\n\r\n1. **Tool calling:** Different tool calling settings (single and multi-turn)\r\n2. **Distillation**: Using large models as teachers to train specialized, compact models that maintain reliability with lower computational cost.\r\n3. **Tool calling data generation:** Challenges in generating diverse tool calling data.", "recording_license": "", "do_not_record": false, "persons": [{"code": "GULAGN", "name": "Gabi Kadlecova", "avatar": "https://pretalx.com/media/avatars/GULAGN_zEPBZyR.webp", "biography": "I am a Machine Learning Researcher at distil labs, where I work on knowledge distillation and tool calling for small language models. I did my PhD at Charles University in Prague, focusing on Neural Architecture Search and surrogate models.\r\nI believe not every problem needs a large and complex model. Both during my PhD and at distil labs, I have been exploring how small models fare compared to state of the art. I enjoy analyzing the problem first - understanding the limitations of both small and large models is what helps us really solve it.", "public_name": "Gabi Kadlecova", "guid": "b6c85ed7-7a03-5b84-939f-a5eb587c8928", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/GULAGN/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/AZ7GD3/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/AZ7GD3/", "attachments": [{"title": "Presentation", "url": "/media/pyconde-pydata-2026/submissions/AZ7GD3/resources/Pydata_E3VQykS.pdf", "type": "related"}]}, {"guid": "a2e2b81f-45bf-5b4d-9eb6-f89990f41550", "code": "M33SNJ", "id": 87890, "logo": null, "date": "2026-04-15T15:00:00+02:00", "start": "15:00", "duration": "00:45", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-87890-building-non-biased-synthetic-datasets-what-actually-works-and-what-fails", "url": "https://pretalx.com/pyconde-pydata-2026/talk/M33SNJ/", "title": "Building Non-Biased Synthetic Datasets: What Actually Works (and What Fails)", "subtitle": "", "track": "PyData: Generative AI & Synthetic Data", "type": "Talk (long)", "language": "en", "abstract": "Synthetic data is often presented as an easy fix for missing or sensitive datasets, but in practice, it can silently introduce bias, leakage, and misleading evaluation results. This talk presents a practical, end-to-end pipeline for creating synthetic datasets that are reproducible, task-aligned, and bias-aware. We will walk through design decisions that matter: template-based generation vs. free-form generation, entity balancing, controlling distributional skew, filtering failure cases, and validating dataset quality before training any model. The session emphasizes what actually works in real pipelines, common failure modes that look fine at first glance, and concrete best practices for Python developers to apply when building synthetic datasets for machine learning, NLP, or evaluation.", "description": "This talk focuses on the engineering side of synthetic dataset creation, treating data as a first-class artifact rather than a byproduct of modeling. It presents a concrete, reusable pipeline for building synthetic datasets that are reproducible, bias-aware, and suitable for evaluation.\r\n\r\n1. Why Synthetic Data Is Not Automatically \u201cSafe\u201d\r\nWe begin by examining common assumptions about synthetic data. While synthetic datasets avoid privacy issues, they often introduce hidden bias, distribution collapse, or label leakage. This section highlights real-world failure modes and explains why many synthetic datasets perform well in benchmarks but fail in practice.\r\n\r\n2. What are the Main Properties of Synthetic Data\r\n\t\t1. Simulated Data\r\n\t\t2. Anonymized\r\n\t\t3. Not Copied\r\n\t\t4. Compliant\r\n\t\t5. It is based on statistical property of real data.\r\n\r\n3. Defining the Task Before Generating Any Data\r\nA dataset pipeline must start with a clear task definition. We discuss how ambiguous task definitions lead to incoherent data and misleading results, and how to formally specify label semantics, constraints, and negative space before generation begins.\r\n\r\n4. Template-Based vs. Free-Form Generation\r\nThis section compares controlled template-based generation with unconstrained LLM prompting. We show why decomposing generation into templates, placeholders, and curated value lists dramatically improves consistency, debuggability, and bias control.\r\n\r\n5. Bias Control by Construction\r\nRather than detecting bias after the fact, we show how to prevent it during generation. Topics include balanced entity lists, randomized substitution, avoiding demographic collapse, and preventing unintended correlations between labels and surface patterns.\r\n\r\n6. Pipeline Architecture and Tooling\r\nWe walk through a practical Python-based pipeline, covering modular generation stages, deterministic sampling, versioning, and reproducibility. Emphasis is placed on making dataset generation repeatable and auditable, just like code.\r\n\r\n7. Filtering, Validation, and Quality Gates\r\nSynthetic data must be filtered aggressively. This section covers structural validation, label consistency checks, distributional sanity checks, and lightweight heuristics that catch most generation errors before model training.\r\n\r\n8. Measuring Dataset Difficulty and Coverage\r\nWe discuss simple, task-agnostic ways to estimate dataset diversity and difficulty, ensuring that synthetic data does not collapse into trivially easy examples or overly clean language.\r\n\r\n9. What Did Not Work (and Why)\r\nThis section summarizes failed approaches, including direct JSON generation, inline annotation, and large one-shot prompts. Understanding these failures helps avoid repeating common mistakes.\r\n\r\n10. When Synthetic Data Is the Right Tool and When It Is Not\r\nWe close with guidance on appropriate use cases for synthetic datasets, their limitations, and how they should complement, not replace, real data and human evaluation.", "recording_license": "", "do_not_record": false, "persons": [{"code": "NCHBH9", "name": "Shiva Banasaz Nouri", "avatar": "https://pretalx.com/media/avatars/NCHBH9_kC7BZeN.webp", "biography": "Shiva Banasaz Nouri is a Senior Data Scientist based in Berlin, Germany, working on applied machine learning with a focus on Python, NLP, computer vision, and generative AI. She builds production-grade AI systems across healthcare, legal, and enterprise domains using open-source technologies.\r\n\r\nShe is the Berlin Chapter Lead of Women in AI, where she actively fosters community building, knowledge sharing, and inclusive participation in the AI and Python ecosystems.", "public_name": "Shiva Banasaz Nouri", "guid": "5ea3de73-4882-50e9-92f6-5244e157f97d", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NCHBH9/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/M33SNJ/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/M33SNJ/", "attachments": []}, {"guid": "28715488-4694-50a2-a45e-031cb4ec60bd", "code": "39MHWT", "id": 87261, "logo": null, "date": "2026-04-15T16:15:00+02:00", "start": "16:15", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-87261-from-ticket-to-draft-how-munich-automates-citizen-inquiries-with-ai", "url": "https://pretalx.com/pyconde-pydata-2026/talk/39MHWT/", "title": "From Ticket to Draft: How Munich Automates Citizen Inquiries with AI", "subtitle": "", "track": "PyData: Natural Language Processing & Audio (incl. Generative AI NLP)", "type": "Talk", "language": "en", "abstract": "The City of Munich is modernizing its communication: With the transition to the Zammad ticketing system, there is a unique opportunity to not only manage citizen inquiries but to proactively process them using Artificial Intelligence. The Zammad-AI project utilizes a two-stage process consisting of intelligent classification and RAG-based (Retrieval-Augmented Generation) response drafting to significantly reduce the workload of administrative staff.\r\nIn this talk, we demonstrate how we integrated Zammad-AI via an internal Kafka message bus to process tickets in real-time. We explore the technical workflow\u2014from thematic context analysis to the generation of valid response drafts based on a department-specific knowledge base.", "description": "## 3. Session Outline (30 Minutes)\r\n\r\n### I. Context & The Pre-Study | **5 min**\r\n* **The Shift:** Transitioning from legacy email communication to **Zammad** within Munich's city administration.\r\n* **Proving the Case:** Utilizing LLMs to analyze historical ticket data to calculate automation potential and project significant time savings before development began.\r\n\r\n### II. Architecture: Integration & Pipeline | **6 min**\r\n* **Event-Driven Design:** Connecting to Zammad via the city-internal **Kafka** message bus.\r\n* **Real-time Processing:** How new tickets are captured and routed to the AI component seamlessly.\r\n\r\n### III. The Two-Stage Process | **12 min**\r\n* **Step 1: Classification & Extraction:** Analyzing thematic context through rule-based logic and LLM-powered information extraction.\r\n* **Step 2: Response Generation:** A **RAG (Retrieval-Augmented Generation)** approach leveraging a knowledge base maintained by subject matter experts.\r\n* **Human-in-the-Loop:** Integrating response drafts into the agent UI for review vs. automated **\"dark processing\"** for high-confidence categories.\r\n\r\n### IV. Scaling & Lessons Learned | **4 min**\r\n* **Multi-Tenant Capability:** Designing for configurability and deployment across various city departments.\r\n* **Key Benefits:** Efficiency gains, response consistency, and establishing a \"Single Voice of the City.\"\r\n\r\n### V. Q&A | **3 min**\r\n* Open discussion on technical tooling, model selection, and legal/privacy frameworks.\r\n\r\n---\r\n\r\n## 4. Key Takeaways for Attendees\r\n\r\n* **Validating Automation:** Techniques for using LLMs to audit historical data and justify development through projected time savings.\r\n* **Practical AI Integration:** How to integrate AI services into existing enterprise infrastructures like Zammad and Kafka.\r\n* **Modular Workflow:** The importance of separating classification from generation for higher system reliability.\r\n* **Operational Insights:** Lessons from scaling AI solutions across diverse governmental branches.", "recording_license": "", "do_not_record": false, "persons": [{"code": "9FM8HN", "name": "Leon Lukas", "avatar": "https://pretalx.com/media/avatars/9FM8HN_6OvE2io.webp", "biography": "Leon Lukas has been the team lead of the AI Competence Center for two years and has played a key role in the development and implementation of AI solutions within the city administration. While he initially trained models and built systems himself, he is now responsible for the architecture and projects at it@m, the city\u2019s IT service provider. For more information on AI in the City of Munich, visit: ki.muenchen.de.", "public_name": "Leon Lukas", "guid": "4841ad24-e364-5f37-b2d9-4d5fdefeeebb", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/9FM8HN/"}], "links": [{"title": "Github", "url": "https://it-at-m.github.io/zammad-ai/", "type": "related"}, {"title": "More Info on AI in Munich", "url": "https://ki.muenchen.de/", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/39MHWT/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/39MHWT/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/39MHWT/resources/From_T_J7KvuFf.pdf", "type": "related"}]}, {"guid": "b26f295f-9705-5825-8a01-edc6212df117", "code": "CVPVPK", "id": 86889, "logo": null, "date": "2026-04-15T16:55:00+02:00", "start": "16:55", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-86889-beyond-vibe-coding-a-practitioner-s-guide-to-spec-driven-development-in-ai-engineering", "url": "https://pretalx.com/pyconde-pydata-2026/talk/CVPVPK/", "title": "Beyond Vibe-Coding: A Practitioner's Guide to Spec-Driven Development in AI Engineering", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk", "language": "en", "abstract": "AI-assisted coding became the default. Tools like GitHub Copilot, Cursor, and Claude can generate hundreds of lines of Python in seconds. However, the real challenge isn't how fast we generate code \u2014 it's how we ensure that generated code actually represents our intent, follows best practices, and integrates cleanly into existing systems.\r\n\r\nIn this talk, I present Spec-Driven Development (SDD), a way to engineer the context in which AI writes code. Using a realistic example from my work building production-grade retrieval-augmented generation systems, I show how specifications can become a practical way to interact with AI coding tools \u2014 grounded in a concrete use case, from spec to implementation.", "description": "AI Engineering is fundamentally about system building. It is the transition from demos to production-grade Python systems that must be scalable, reliable, and testable. In my experience, one way to achieve this consistently with AI-generated code is to stop coding first \u2014 and start specifying first.\r\n\r\nSpec-Driven Development is a practical methodology for AI-assisted development. It is not about heavy bureaucracy; it's about creating a \"Single Source of Truth\" that both humans and AI agents can rely on.\r\n\r\nIn this talk, I will walk through a realistic feature in a production-grade retrieval-augmented generation system. I will demonstrate how I used SpecKit \u2014 one example of a structured spec workflow, usable with different AI coding assistants \u2014 to move from a feature request to a reviewable spec, a research document, interface contracts, and a phased task plan \u2014 all before writing a single line of implementation code.\r\n\r\n**What You Will Learn:**\r\n\r\n- *What is Spec-Driven Development?*\r\n- *The Paradigm Shift:* Why \"specifying\" may be the new \"coding\" in a world of Large Language Models.\r\n- *How to use SpecKit as one example of a structured spec workflow* \u2014 usable with different AI coding assistants.", "recording_license": "", "do_not_record": false, "persons": [{"code": "BZLFAX", "name": "Alina Dallmann", "avatar": "https://pretalx.com/media/avatars/BZLFAX_nUrQXZG.webp", "biography": "Alina Dallmann is an AI Engineer at scieneers GmbH. As a computer scientist, she combines her passion for classical software engineering with modern, data-driven projects. Most recently, her focus has been on building production-ready Retrieval-Augmented Generation (RAG) systems.", "public_name": "Alina Dallmann", "guid": "5e27c97e-56ee-577a-b74b-45b1cd11dded", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/BZLFAX/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/CVPVPK/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/CVPVPK/", "attachments": [{"title": "Slidedeck", "url": "/media/pyconde-pydata-2026/submissions/CVPVPK/resources/2026-0_CDnmkwZ.pdf", "type": "related"}]}, {"guid": "8faa2c86-044b-5342-ad9b-ef8c967e6614", "code": "7PNT37", "id": 88399, "logo": null, "date": "2026-04-15T17:35:00+02:00", "start": "17:35", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-88399-to-nest-or-not-to-nest-nested-data-types-in-polars-with-big-data", "url": "https://pretalx.com/pyconde-pydata-2026/talk/7PNT37/", "title": "To nest, or not to nest? Nested data types in Polars with big data", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk", "language": "en", "abstract": "Do you find yourself weighing up the pros and cons of using nested types in the Polars library - pondering whether you should encode your variables in structures using lists, arrays or opt for a flat format without complex hierarchy? This talk focuses on the crucial design choices available, the performance implications, and how this impacts the logic of your queries, as well as code readability, when deciding how to implement your big data pipeline in Polars. The methods available for nested types in Polars have seen some significant additions over the last year, with powerful functionality, such as filtering and aggregation, released in the latest versions of the library. These provide much-needed shortcuts for queries interrogating complex nested structures that previously required sophisticated user-defined functions. It makes the use of nested types much easier and intuitive, but does this mean you should nest your data? Through practical examples you\u2019ll learn some guidelines to help you decide.", "description": "If you\u2019ve ever designed or used SQL databases in your data science projects perhaps you\u2019ve cringed at the lack of relational structure and data duplication in the design of big data storage and processing. On the other hand, if you\u2019ve spent any considerable time getting dirty with Polars\u2019 vectorized and columnar processing, you\u2019ll also know that this can be somewhat of a moot point. So why bother?\r\n\r\nOutline of the talk:\r\n\r\n5 minutes: Introduction & origin story. What are Polars nested types? How do they work? Why do they matter?\r\n5 minutes: Back to the future. Advanced queries on nested types, past & present.\r\n5 minutes: Query structure - \u201cGroup by\u201d forever baby, versus element-wise.\r\n5 minutes: Storage comparison and the gigabyte scrooge - how a miser decides on a nested Polars structure.\r\n5 minutes: Time is money \u2013 How performance stacks up.\r\n5 minutes: Q&A\r\n\r\nBy the end of the talk, participants will have seen several straightforward examples, as well more advanced illustrations of nested structures in Polars using real-world data. They will be able to identify some key considerations informing their use of nested structures, including query logic, storage and performance.", "recording_license": "", "do_not_record": false, "persons": [{"code": "3US9H9", "name": "Daniel Finnan", "avatar": "https://pretalx.com/media/avatars/3US9H9_xy7ZuqY.webp", "biography": "Daniel Finnan is a 2nd year PhD candidate at the Lirsa laboratory, Conservatoire national des arts et m\u00e9tiers (CNAM), in Paris. His thesis focuses on decentralized finance, specifically decentralized exchanges, applying a quantitative methodology using blockchain data, techniques in data science, and time series econometrics. He codes in Python, R, and occasionally Rust and JavaScript, specifically using Python to manage data pipelines. He has a professional certification in full-stack development and holds a Master\u2019s degree in Economics, with a specialization in Economic, Digital and Data strategies from CNAM\u2019s department of Economics, Finance, Insurance and Banking.", "public_name": "Daniel Finnan", "guid": "756e621a-4e5c-5125-8d94-42cfbb4b5815", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/3US9H9/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/7PNT37/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/7PNT37/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/7PNT37/resources/slides_YD9tPPL.pdf", "type": "related"}]}], "Platinum [2nd Floor]": [{"guid": "00324066-7a41-5ffc-b0a9-25453bbf0ad9", "code": "DQDEES", "id": 87689, "logo": null, "date": "2026-04-15T10:15:00+02:00", "start": "10:15", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-87689-state-of-in-browser-ml-webassembly-webgpu-and-the-modern-stack", "url": "https://pretalx.com/pyconde-pydata-2026/talk/DQDEES/", "title": "State of In-Browser ML: WebAssembly, WebGPU, and the Modern Stack", "subtitle": "", "track": "PyData: PyData & Scientific Libraries Stack", "type": "Talk", "language": "en", "abstract": "What if you could run real data/ML workflows right in your browsers - sandboxed, with no installation or sending your data anywhere? Such an approach would have tons of benefits: it is easy to distribute, safer by default, and can scale almost infinitely with virtually no infrastructure costs. \r\n\r\nThis talk is a pragmatic overview of the current in-browser ML stack. We\u2019ll cover what  workflows are realistic today (from training of traditional ML models to on-device LLM inference), how packaging/loading works, and the constraints one should be aware of. By the end of the talk you will have a clear sense of when in-browser ML is a good fit, and when it isn\u2019t.", "description": "Over the last few years, the tooling has matured enough to make \"ML in a tab\" worth taking seriously. Today, you can execute Python code in a sandboxed environment, ship interactive demos as a single URL, and even run LLM inference entirely on-device, without installations, servers, or sending data anywhere. In this talk, we will give a practical overview of the current in-browser ML stack, focusing on what is realistically possible today and the practical limits you still have to design around.\r\n\r\nWe will start with interactive environments such as JupyterLite and explain how they work under the hood via Pyodide: what it means to run CPython compiled to WebAssembly, how the filesystem and networking model differ from \"normal\" Python, and what that implies for performance, I/O, and package support. \r\n\r\nWe will then move from notebooks to applications with PyScript, showing how the same building blocks can be used to create shareable browser-based tools. We will also briefly cover the lower-level approach: using Pyodide directly and orchestrating it with JavaScript for granular control over loading, packaging, and data interchange.\r\n\r\nFinally, we will cover in-browser inference workflows for both traditional and deep learning models (via ONNX), and LLMs (via wllama and WebLLM), and discuss how WebGPU can accelerate these pipelines.\r\n\r\nBy the end of the talk, attendees will have a clear overview of the in-browser ML ecosystem and the practical intuition to decide whether it's the right choice for your next project.\r\n\r\n**Target Audience:** \r\nThis talk can be relevant for a broad audience. However, at least intermediate knowledge of ML / familiarity with Python ML ecosystem is required.\r\n\r\n**Outline:**\r\n- Introduction + Motivating examples [4 min]\r\n- Running Python in WebAssembly [6 min]\r\n    - Overview of Pyodide [2 min]\r\n    - Package management [3 min]\r\n    - Runtime and memory constraints [1 min]\r\n- Overview of interactive dev environments / JupyterLite [4 min] \r\n- Building applications with PyScript and direct Pyodide bindings [7 min]\r\n- On-device ML inference using ONNX, WebGPU, WebLLM, and wllama [5 min]\r\n- Q&A [4 min]", "recording_license": "", "do_not_record": false, "persons": [{"code": "NWAQCX", "name": "Oleh Kostromin", "avatar": "https://pretalx.com/media/avatars/NWAQCX_J60YTrz.webp", "biography": "I am a Data Scientist primarily focused on Deep Learning and MLOps. In my spare time I contribute to several open-source python libraries.", "public_name": "Oleh Kostromin", "guid": "68c7801e-b9b7-5c17-bc44-d5e705e5c269", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NWAQCX/"}, {"code": "EACXYX", "name": "Iryna Kondrashchenko", "avatar": "https://pretalx.com/media/avatars/EACXYX_merNsK0.webp", "biography": "Iryna is a data scientist and co-founder of DataForce Solutions GmbH. At DataForce, the team is building [LUML](https://luml.ai/), an open-source, end-to-end AIOps platform that lets teams track experiments, version models, deploy, and monitor\u2014all in one place.", "public_name": "Iryna Kondrashchenko", "guid": "70018355-f170-5858-9983-12cb340d312d", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/EACXYX/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/DQDEES/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/DQDEES/", "attachments": []}, {"guid": "1ab09024-50ad-5432-8710-bdd6c511c32a", "code": "VY3CY7", "id": 94651, "logo": null, "date": "2026-04-15T10:55:00+02:00", "start": "10:55", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-94651-leveraging-hexagonal-architecture-when-building-applications", "url": "https://pretalx.com/pyconde-pydata-2026/talk/VY3CY7/", "title": "Leveraging Hexagonal Architecture When Building Applications", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Sponsored Talk", "language": "en", "abstract": "Hexagonal architecture in software development is a design pattern that has existed for more than 20 years and remains highly applicable today as we enter an era where LLMs are increasingly used as development tools. At a high level, it can be used to create distinct layers within an application, resulting in more maintainable and flexible code. One of the primary benefits of utilizing this architecture is the separation of concerns, allowing different components of software to be swapped as needed - whether that is business logic, database technologies, or external services. In this talk, I will discuss the benefits and practical applications of hexagonal architecture. I will also include a detailed walkthrough of how this pattern is implemented in a real-world application.", "description": "This talk will cover the following related to hexagonal architecture:\r\n\r\n**Introduction**\r\nThe hexagonal architecture design pattern, also known as \u201cPorts and Adapters\u201d, was introduced by Alistair Cockburn in the early 2000s. With the increase in usage of LLMs as software development tools, this design pattern can help create clear boundaries within applications and make code more understandable and modifiable by AI tools.\r\n\r\n**Core principles and concepts**\r\nIn this section, I will discuss the fundamental concepts that make hexagonal architecture effective. This includes, the central application core (business logic/domain), ports (interfaces that define contracts), and adapters (implementations that handle external interactions, for example interaction with a database or external services).\r\n\r\n**Benefits and problem-solving capabilities**\r\nThe discussion will highlight benefits including enhanced testability, improved maintainability by reducing coupling, and easier technology migration. I'll demonstrate how hexagonal architecture addresses common development pain points such as database lock-in, framework dependencies, and the challenge of writing effective unit tests.\r\n\r\n**Implementation and real-world case study**\r\nIncluded in this presentation will be a real-world case study of how hexagonal architecture is implemented in a production application. This example will demonstrate how to handle common scenarios such as database functionality, external API integration, and user management. The case study will show actual Python code, highlighting patterns for repository implementations, service layers, and adapter configurations.\r\n\r\n**Conclusion and Q&A**\r\nThe presentation concludes with the key takeaways, resources for further learning, and an interactive Q&A session.", "recording_license": "", "do_not_record": false, "persons": [{"code": "SAGEL8", "name": "Luke Gerstner", "avatar": "https://pretalx.com/media/avatars/GUZZ8D_S0LsxTj.webp", "biography": "I started my career as a data scientist in the oil and gas industry, where I worked on building services to deploy machine learning models in a production environment. Currently, I work as a software engineer at Rosenxt on a cloud backend team building a multi-tenant data management system. I am passionate about the combination of data science and software engineering and continuing to grow in both fields.", "public_name": "Luke Gerstner", "guid": "04a9f5ec-3d3e-5f2f-8b37-763815721e01", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/SAGEL8/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/VY3CY7/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/VY3CY7/", "attachments": [{"title": "Powerpoint slides", "url": "/media/pyconde-pydata-2026/submissions/VY3CY7/resources/Lever_5Q0Gwbm.pptx", "type": "related"}]}, {"guid": "b8c3c968-973b-547a-ae97-bfe6f83f0c10", "code": "F79RG9", "id": 88411, "logo": null, "date": "2026-04-15T15:00:00+02:00", "start": "15:00", "duration": "00:45", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-88411-scaling-data-processing-for-training-workloads-at-deepl-research-with-rust", "url": "https://pretalx.com/pyconde-pydata-2026/talk/F79RG9/", "title": "Scaling Data Processing for Training Workloads at DeepL Research with Rust", "subtitle": "", "track": "General: Rust", "type": "Talk (long)", "language": "en", "abstract": "This talk will detail how we used Rust to solve a number of resource utilization inefficiencies while scaling data pre-processing to a petabyte scale and enable next-generation model training at DeepL. Besides other factors, this was done by developing an internal library for interacting with Parquet files in a memory efficient nature.\r\n\r\nTopics include:\r\n\u2022 Convincing you to love Rust for its memory safety\r\n\u2022 Comparing C++ and Rust ecosystems for Python library development\r\n\u2022 Diving into Python-Rust interoperability\r\n\u2022 Convincing you to love Rust for its user-friendly (yes, actually!) language features\r\n\u2022 Providing a high-level overview of the continuously growing impact that Rust is having on the Arrow and data engineering ecosystem", "description": "We set out to replace an inefficient internal file format with an industry standard - a seemingly straightforward task. What we got instead was a descent into memory leak hell.\r\n\r\nThis talk will walk you through our journey of scaling DeepL's data preprocessing and model training pipelines to handle petabyte-scale corpora. When open-source C++-based Python libraries proved too unstable and memory-inefficient, we invested time and resources into developing our own Rust-based tooling and, compared to our previous internal file format, decreased memory load by a factor of 10 and latency until first byte read by a factor of 50.\r\n\r\nWhat we'll cover:\r\n\u2022 **Why Rust's memory safety guarantees matter in practice:** We will provide a direct comparison of our results using C++-based vs Rust-based implementations for data processing libraries.\r\n\u2022 **The Rust ecosystem advantage for Python interop:** While C++ offers a fragmented landscape of build systems and tooling choices, Rust provides a canonical path with cargo, maturin, and PyO3\u2014providing a clean interface for everything from GIL management to readable, zero-copy conversions between Rust and Python objects\r\n\u2022 **Rust's surprisingly friendly features:** Despite its reputation for having a steep learning curve, Rust offers language features that make it genuinely pleasant to work with, even for beginners coming from a Python background: from enums to pattern matching, error handling with Result, and cargo's canonical, ergonomic tooling.\r\n\u2022 **Rust's impact on the arrow ecosystem and data engineering with Python in general:** Besides the well-known impact that Rust-based data processing libraries like polars, Daft, and datafusion are having on the engineering ecosystem, we we will show how the Rust implementation of Arrow called arrow-rs is having a growing impact and expanding the data engineering toolkit by powering an increasing number of great and contributor-friendly processing and introspection tools built in Rust.", "recording_license": "", "do_not_record": false, "persons": [{"code": "87HSSJ", "name": "Jonas Dedden", "avatar": "https://pretalx.com/media/avatars/87HSSJ_cdlm3q5.webp", "biography": "Hi, I'm Jonas Dedden, Staff Research Data Engineer at DeepL SE, Germany. Johanna Goergen and I work at the Research Data Platform team of DeepL Research, where we are responsible for the on-prem & cloud-based k8s compute infrastructure for petabyte scale data processing pipelines. We provide the platform that our Research Data Engineers can use to collect & preprocess all data needed for training the DeepL foundational language models that power our production services.", "public_name": "Jonas Dedden", "guid": "e6b6aa20-b94b-564f-b396-fc20f37f6123", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/87HSSJ/"}, {"code": "VBRTYZ", "name": "Johanna Goergen", "avatar": "https://pretalx.com/media/avatars/VBRTYZ_5lmmMMi.webp", "biography": "I'm a Staff Research Data Engineer in the Research Department of DeepL, working on platform-level tooling for scaling data pipelines to petabyte scale. I have been part of the initiative to adopt Rust in critical components used for model training, and I'm looking forward to sharing this experience with you.", "public_name": "Johanna Goergen", "guid": "7dde4506-be03-5c41-825d-789481b7c195", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/VBRTYZ/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/F79RG9/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/F79RG9/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/F79RG9/resources/PyCon__7PA2LWP.pdf", "type": "related"}]}, {"guid": "d79a3f4a-cf86-5cff-b2e6-8061e3f50a33", "code": "BFYYQG", "id": 87732, "logo": null, "date": "2026-04-15T16:15:00+02:00", "start": "16:15", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-87732-build-a-web-coding-platform-with-python-run-in-webassembly", "url": "https://pretalx.com/pyconde-pydata-2026/talk/BFYYQG/", "title": "Build a web coding platform with Python, run in WebAssembly", "subtitle": "", "track": "PyCon: Python Language & Ecosystem", "type": "Talk", "language": "en", "abstract": "Ever wanted to build a website that can run python, but you're worried about running user submitted code on your server?\r\nIn this talk I'll show how Holoviz Panel can create an interactive coding environment where students can write functions, solve exercises, and experiment safely, all while their code runs locally via WebAssembly.", "description": "### The Problem\r\nBuilding interactive Python learning platforms traditionally requires server infrastructure to execute user code, creating security risks and operational overhead. What if we could run Python entirely in the browser?\r\n\r\n### The Solution\r\nThis talk presents a coding platform built with Holoviz Panel that executes Python through WebAssembly via Pyodide. The entire application \u2013 UI and code execution \u2013 runs client-side, eliminating backend complexity while providing safe, isolated Python execution.\r\n\r\n### Architecture Overview\r\nThe platform combines three key technologies:\r\n- **Holoviz Panel** for building the interactive interface with its built-in code editor component\r\n- **Pyodide** for secure Python execution via WebAssembly in the browser\r\n- **LocalStorage** for persisting student progress without a database\r\n\r\n### Key Features\r\nThe platform supports multiple learning modalities:\r\n- Coding exercises validated against pre-defined test cases \u2013 from simple variable assignments to complete functions with return values or print statement\r\n- Interactive playground that evaluates expressions and captures output\r\n- Single and multiple-choice questions for concept checks\r\n\r\n### What You'll Learn\r\nThis talk covers the technical integration between Panel's UI framework and Pyodide's execution environment \u2013 the critical piece that makes browser-based Python coding work. Attendees will understand:\r\n- How to architect client-side Python applications\r\n- Running Panel components with Pyodide\r\n- Trade-offs between client-side and server-side execution\r\n- Handling code execution, output capture, and state management\r\n\r\n### Target Audience\r\nData scientists, educators, and developers interested in building interactive Python tools without server infrastructure. Basic familiarity with Python web frameworks is helpful but not required.\r\n\r\n### Background\r\nThis work originated from my bachelor thesis exploring Python education. The resulting platform demonstrates that WebAssembly enables entirely new architectures for Python applications \u2013 shifting from traditional server models to fully client-side execution.\r\n\r\nThe takeaway: You can build sophisticated Python applications that run anywhere there's a browser, with no backend server setup, no security concerns about arbitrary code execution, and no additional infrastructure costs.", "recording_license": "", "do_not_record": false, "persons": [{"code": "W3RRQT", "name": "Maris Nieuwenhuis", "avatar": "https://pretalx.com/media/avatars/W3RRQT_vHAuCRy.webp", "biography": "Junior Software Developer\r\nBachelor of Science in Media Informatics (Medieninformatik B. Sc.)\r\nGraduated from Berliner Hochschule f\u00fcr Technik in 2025\r\nPassionate about Python and programming in general, coding challenges and tutoring/education", "public_name": "Maris Nieuwenhuis", "guid": "d67b117c-3fd6-5000-8344-a12742b915f5", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/W3RRQT/"}], "links": [{"title": "Talk slides (google slides)", "url": "https://docs.google.com/presentation/d/1RQqUNh6MiGLBcnrXRMVsfzBeusHU8aag6_G-Xqc1_1M/edit?usp=sharing", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/BFYYQG/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/BFYYQG/", "attachments": [{"title": "Talk slides PDF", "url": "/media/pyconde-pydata-2026/submissions/BFYYQG/resources/WASM-C_YmIwLEF.pdf", "type": "related"}]}, {"guid": "dd17a2a0-daab-5273-ab52-4994b0908eee", "code": "X3KQMQ", "id": 87733, "logo": null, "date": "2026-04-15T16:55:00+02:00", "start": "16:55", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-87733-before-you-ship-your-agent-an-agent-builder-s-primer-on-jailbreaking-attacks", "url": "https://pretalx.com/pyconde-pydata-2026/talk/X3KQMQ/", "title": "Before You Ship Your Agent: An Agent Builder\u2019s Primer on Jailbreaking Attacks", "subtitle": "", "track": "General: Autonomous Systems & AI Agents", "type": "Talk", "language": "en", "abstract": "Before you ship an AI agent to production, you need to understand how it can be broken. Jailbreaking and prompt injection attacks are not edge cases\u2014they are an inevitable consequence of deploying real-world, action-taking AI systems.\r\n\r\nThis talk is a practical primer on the most common ways agents fail under adversarial pressure. We\u2019ll break down how jailbreaking and prompt injection attacks actually work, including techniques such as excessive agency, prompt leakage, and weaknesses in vector search and embeddings. We\u2019ll examine why popular AI guardrails consistently fail in practice, and offer little more than a false sense of protection.\r\n\r\nWe\u2019ll also address a common misconception: the absence of major AI security incidents does not mean systems are safe. Instead, it reflects limited deployment, constrained agency, and cautious rollout. As organizations adopt browser agents, autonomous tools, and systems that can take real-world actions, these vulnerabilities quickly become critical attack surfaces.\r\n\r\nThis talk focuses on what organizations should do instead: applying proven security principles\u2014least privilege, isolation, monitoring, and abuse modeling\u2014adapted to the unique properties of AI systems. Attendees will leave with a clear understanding of the real risks, why they matter today, and the concrete steps to take before shipping an AI agent into production.", "description": "AI agents are rapidly moving from demos and copilots into production systems that browse the web, call APIs, execute workflows, and take real\u2011world actions. As this transition happens, a critical truth is becoming unavoidable: any agent with meaningful capability will be attacked\u2014and most are easy to break. Jailbreaking and prompt injection attacks are not theoretical research topics or rare edge cases; they are an inevitable outcome of deploying autonomous, instruction\u2011following systems in adversarial environments.\r\n\r\nThis talk is a practical, engineering\u2011focused primer on how AI agents fail under real\u2011world pressure, and what organizations must understand before shipping an agent into production. Rather than focusing on sensational examples or hypothetical risks, we will examine the concrete mechanisms that attackers use today, why they work, and why many popular defenses provide little real protection.\r\n\r\nWe begin with a clear, accessible overview of jailbreaking and prompt injection attacks. Attendees will learn how attackers manipulate model instructions, context windows, and tool\u2011calling behavior to override intended safeguards. We\u2019ll cover both direct prompt injection (explicitly malicious instructions) and indirect prompt injection, where hostile content is embedded in webpages, documents, emails, or user\u2011generated data that agents are designed to consume. These attacks are especially dangerous because they exploit normal, expected behavior rather than software bugs.\r\n\r\nFrom there, we\u2019ll explore several recurring failure modes that appear across nearly all production agent architectures:\r\n\r\nExcessive agency: Agents are often given broader permissions and autonomy than necessary, turning minor instruction hijacks into high\u2011impact incidents.\r\nPrompt leakage: System prompts, policies, secrets, and internal instructions are frequently exposed or inferable, providing attackers with a roadmap for further exploitation.\r\nVector and embedding weaknesses: Retrieval\u2011augmented generation systems can be poisoned or manipulated, allowing malicious content to outrank trusted sources and influence agent decisions.\r\nTool and browser abuse: Agents that browse the web or execute actions are uniquely vulnerable to hostile environments intentionally crafted to manipulate them.\r\nA key focus of the talk is why AI guardrails don\u2019t work the way many teams expect. We\u2019ll examine common approaches\u2014prompt\u2011based restrictions, content filters, and policy\u2011layer defenses\u2014and explain why they are brittle, bypassable, and often fail silently. Rather than stopping attacks, these mechanisms frequently create a false sense of security that masks deeper architectural risks.\r\n\r\nWe\u2019ll also address a common misconception in the industry: \u201cIf these vulnerabilities are so serious, why haven\u2019t we seen major AI security incidents yet?\u201d The answer is not that systems are safe, but that most deployments are still constrained\u2014limited autonomy, limited blast radius, and cautious rollout. As organizations move toward browser agents, long\u2011running autonomous workflows, and systems with real operational authority, the conditions that have so far prevented large\u2011scale incidents will disappear. When that happens, these attack classes will move from curiosity to crisis.\r\n\r\nThe final section of the talk focuses on what actually works. Instead of recommending yet another AI security product or guardrail framework, we will outline practical, proven steps organizations can take today, grounded in decades of security engineering experience:\r\n\r\nApplying least privilege and minimizing agent capabilities\r\nIsolating tools, credentials, and execution environments\r\nDesigning for failure and containment, not perfect prevention\r\nMonitoring agent behavior for abuse patterns rather than policy violations\r\nPerforming threat modeling that treats prompts and context as untrusted input\r\nAttendees will leave with a clear mental model of how AI agents are attacked, why these attacks succeed, and how to reduce risk without relying on ineffective silver bullets. This talk is intended for engineers, security practitioners, and technical leaders building or deploying AI agents who want to understand the real risks\u2014and take responsible action\u2014before putting these systems into production.", "recording_license": "", "do_not_record": false, "persons": [{"code": "AZ7FNH", "name": "Simonas \u010cerniauskas", "avatar": "https://pretalx.com/media/avatars/AZ7FNH_0u8WXHP.webp", "biography": "Dr.-Ing. Simonas \u010cerniauskas is the founder and CTO of tisix.io, specializing in developing practical LLM solutions for media and publishers. With a doctorate from RWTH Aachen and experience as a principal researcher at Research Center J\u00fclich, he combines deep technical expertise with hands-on implementation experience. His work focuses on multi-modal content generation and media processing. Drawing from his background in mechanical engineering, quality assurance and machine learning engineering, Simonas develops scalable AI solutions while maintaining a strong focus on quality assurance and risk management. He regularly shares insights through speaking engagements and technical publications, helping organizations navigate the complexities of AI implementation with practical, business-focused approaches.", "public_name": "Simonas \u010cerniauskas", "guid": "80386181-7e74-5611-bb46-c16ceeb97330", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/AZ7FNH/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/X3KQMQ/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/X3KQMQ/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/X3KQMQ/resources/pycon__Ueg2oQp.pdf", "type": "related"}]}, {"guid": "b2b3f213-a933-5101-b676-b2fd647d4fb9", "code": "V7LQGR", "id": 86976, "logo": null, "date": "2026-04-15T17:35:00+02:00", "start": "17:35", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-86976-don-t-let-imposter-syndrome-win-u-can-do-big-things-from-a-small-place-a-7-year-african-ai-journey", "url": "https://pretalx.com/pyconde-pydata-2026/talk/V7LQGR/", "title": "Don\u2019t Let Imposter Syndrome Win: U Can Do Big Things from a Small Place, A 7-Year African AI Journey", "subtitle": "", "track": "General: Community & Diversity", "type": "Talk", "language": "en", "abstract": "Imposter syndrome affects engineers everywhere, but underrepresented professionals often face amplified self-doubt due to geography, limited access, and systemic biases. In this talk, I share my 7-year journey as an African AI engineer building global impact from outside major tech hubs. From founding DataFestAfrica and leading remote AI opportunities to getting the attention of organizations like Huawei, MongoDB, McKinsey, and AnyScale, I\u2019ll show how community, mentorship, open source, media presence, and strategic partnerships can create opportunities and influence. Attendees will gain practical strategies to overcome self-doubt, expand their reach, and make a meaningful difference in tech, no matter where they are.", "description": "Imposter syndrome affects engineers worldwide, but for underrepresented professionals, geographic location, limited access to resources, and systemic biases can amplify its impact. Many talented engineers outside major tech hubs face self-doubt, missed opportunities, and barriers to career growth, even when they have the skills and vision to make meaningful contributions globally. Recognizing and addressing these challenges is crucial for building inclusive and diverse tech ecosystems.\r\n\r\nThis topic is particularly relevant for early- to mid-career engineers, community builders, and professionals navigating global tech ecosystems. Many in these groups experience self-doubt, uncertainty about career paths, and difficulty gaining visibility and recognition. Understanding how to overcome these challenges can empower them to take bold steps, make an impact beyond their immediate environment, and thrive despite systemic limitations.\r\n\r\nIn this talk, I share actionable strategies that helped me overcome imposter syndrome and build influence from any location. Drawing on my 7-year journey as an African AI engineer, I highlight how leveraging community building, open source contributions, mentorship, media engagement, and strategic partnerships can create meaningful opportunities. I illustrate these strategies through real-world examples, including founding DataFestAfrica, growing AI and MLOps communities with limited funding in emerging regions and collaborating with global organizations. Attendees will leave with practical insights to overcome self-doubt, expand their reach, and make a global impact from wherever they are.", "recording_license": "", "do_not_record": false, "persons": [{"code": "YET9G9", "name": "Gift  Ojeabulu", "avatar": "https://pretalx.com/media/avatars/YET9G9_0Fl7b8X.webp", "biography": "Gift Ojeabulu is a data scientist, AI/ML practitioner, and community builder with over six years of experience at the intersection of artificial intelligence, software engineering, and developer advocacy. He has led and scaled global AI communities, including growing Iterative.ai\u2019s community to over 30,000 data, ML, and AI professionals worldwide. Gift has curated hundreds of technical content pieces annually and has worked with AI startups such as Deci AI and DagsHub to shape developer relations and content strategies for highly technical audiences. He is a four-time AWS Community Builder in Machine Learning and AI, serving as a board advisor to DevNetwork (USA) in the areas of artificial intelligence and developer advocacy.\r\n\r\nAs the co-founder of Data Community Africa (DCA), the largest Black data and AI community on the continent, Gift has led initiatives that support education, open-source collaboration, and professional growth, including the African Data Community Newsletter, which reaches over 2,500 subscribers across 80 countries. He has contributed to major ecosystem efforts such as DatafestAfrica and leads the Lagos MLOps community, where he focuses on practical MLOps, large language models, and open-source AI development. Through his work, Gift actively advances Africa\u2019s data and AI ecosystem by connecting local talent to global opportunities and fostering sustainable innovation.", "public_name": "Gift  Ojeabulu", "guid": "ed19436a-9e2a-551e-8d2e-902f334e037a", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/YET9G9/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/V7LQGR/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/V7LQGR/", "attachments": []}], "Europium [3rd Floor]": [{"guid": "7903650c-3fae-55dd-bed7-9ceac38c89ea", "code": "3XDMXS", "id": 86854, "logo": null, "date": "2026-04-15T10:15:00+02:00", "start": "10:15", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-86854-from-research-models-to-slas-operationalizing-tsfms-with-python", "url": "https://pretalx.com/pyconde-pydata-2026/talk/3XDMXS/", "title": "From Research Models to SLAs: Operationalizing TSFMs with Python", "subtitle": "", "track": "PyCon: MLOps & DevOps", "type": "Talk", "language": "en", "abstract": "Time series foundation models (TSFMs) such as Chronos, Lag-Llama, TimesFM, and Siemens\u2019 own GTT have shown strong generalization capabilities across diverse forecasting tasks. However, integrating these models into a large organization is primarily a software engineering and MLOps challenge rather than a modeling one.\r\n\r\nIn this talk, we present a real-world case study based on Siemens KPI Forecast, a Python-based forecasting platform that operationalizes multiple TSFMs as reusable, production-grade services. The platform integrates both open research models and Siemens-developed models behind a unified API, supporting zero-shot inference, fine-tuning jobs, and fine-tuned inference depending on user needs and operational constraints.\r\n\r\nWe focus on how Python is used to compose heterogeneous components including open and closed-source models, internal data products, APIs, and orchestration layers into a consistent time series specialist user experience. The session also covers challenges operating such services within a B2B environment, including issues related to monitoring, versioning, and governance.\r\n\r\nAttendees will gain practical insights into turning TSFMs into reliable Python services that scale across teams and use cases.", "description": "### Motivation\r\n\r\nTime series foundation models promise rapid prototyping and strong performance across domains, but many teams struggle to move beyond notebooks and benchmarks. In practice, the hardest problems are not model accuracy or architecture, but integration, operability, and developer experience.\r\n\r\nThis talk addresses a common but under-discussed question:\r\n\r\nHow do you operationalize time series foundation models inside a large organization with real users, real constraints, and real SLAs?\r\n\r\n### Case study context\r\n\r\nThe talk is based on hands-on experience building and operating Siemens KPI Forecast, a Python-based forecasting platform that exposes multiple TSFMs through stable APIs. The platform integrates:\r\n\r\n- Chronos ([https://arxiv.org/abs/2308.16103](https://arxiv.org/abs/2403.07815))\r\n- Lag-Llama ([https://arxiv.org/abs/2310.08268](https://arxiv.org/abs/2310.08278))\r\n- TimesFM ([https://arxiv.org/abs/2310.10688](https://arxiv.org/pdf/2310.10688))\r\n- GTT, a Siemens-developed large-scale time series model (https://arxiv.org/pdf/2402.07570.pdf)\r\n\r\nChronos, Lag-Llama, and TimesFM are open-source research models, while GTT is a proprietary Siemens model. The platform is designed to treat both open and closed-source models uniformly from a developer and user perspective.\r\n\r\n### Topics covered\r\n\r\n- Why TSFMs are easy to prototype but hard to operationalize\r\n- Designing Python APIs that unify multiple foundation models\r\n- Supporting zero-shot inference, fine-tuning jobs, and fine-tuned inference in one system\r\n- Integrating open-source and proprietary models consistently\r\n- Making forecasting services accessible to different user personas\r\n- Challenges related to operating ML services in a B2B environment including monitoring, versioning, and governance considerations\r\n\r\n### What attendees will learn\r\n\r\n- How to structure Python services around foundation models\r\n- How to avoid fragmentation when supporting multiple models and workflows\r\n- Practical MLOps patterns for operating ML services beyond notebooks\r\n- Lessons learned from running TSFMs at organizational scale\r\n\r\nThis session focuses on engineering and operational lessons that are broadly applicable to teams building Python-based ML platforms in both enterprise and open-source contexts. Model references are included for transparency; the talk focuses on system design and operational patterns rather than proprietary details.", "recording_license": "", "do_not_record": false, "persons": [{"code": "NH38CT", "name": "Jeyashree Krishnan", "avatar": null, "biography": "Jeyashree Krishnan is a Senior Machine Learning Engineer at Siemens AG. Her work focuses on building and operationalizing scalable machine learning services, with an emphasis on foundation models and time series forecasting. She is also a Visiting Researcher at the Center for Computational Life Sciences, RWTH Aachen University.", "public_name": "Jeyashree Krishnan", "guid": "08c016d9-4a89-5259-a1d3-f83603e51b06", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NH38CT/"}, {"code": "DNYXSR", "name": "Catarina Filipe", "avatar": null, "biography": "Hi, I'm Catarina! I've been a Data Scientist at Siemens for the past 4 years. I'm now focused on the mission of unifying all time series related topics under one roof. I'm also happy to be back in Darmstadt, the city where I completed part of my Master's degree, to talk about Python.", "public_name": "Catarina Filipe", "guid": "4401e89a-559d-5ceb-88a6-9ae42d5d6851", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/DNYXSR/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/3XDMXS/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/3XDMXS/", "attachments": []}, {"guid": "5b72e70c-08dc-5d7b-a81e-372bb6c894d5", "code": "YZM8TA", "id": 88308, "logo": null, "date": "2026-04-15T10:55:00+02:00", "start": "10:55", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-88308-demystifying-agentic-ai-using-small-language-models", "url": "https://pretalx.com/pyconde-pydata-2026/talk/YZM8TA/", "title": "Demystifying Agentic AI Using Small Language Models", "subtitle": "", "track": "General: Autonomous Systems & AI Agents", "type": "Talk", "language": "en", "abstract": "The AI world is buzzing with claims about \u201cagentic intelligence\u201d and autonomous reasoning. Behind the hype, however, a quieter shift is taking place: Small Language Models (SLMs) are proving capable of many reasoning tasks once assumed to require massive LLMs. When paired with fresh business data from modern lakehouses and accessed through tool calling, these models can power surprisingly capable agents.\r\n\r\nIn this talk, we cut through the noise around \u201cagents\u201d and examine what actually works today. You\u2019ll see how compact models such as Phi-2 or xLAM-2 can reason and invoke tools effectively, and how to run them on development laptops or modest clusters for fast iteration. \r\nBy grounding agents in business facts stored in Iceberg tables, hallucinations are reduced, while Iceberg\u2019s read scalability enables thousands of agents to operate in parallel on a shared source of truth.\r\nAttendees will leave with a practical understanding of data agent architectures, SLM capabilities, Iceberg integration, and a realistic path to deploying useful data agents - without a GPU farm.", "description": "The Agentic Buzz  -  What\u2019s Real, What\u2019s Marketing\r\n\r\n- The explosion of \u201cagentic\u201d frameworks and the confusion it causes\r\n- What an agent really is at its core: planning, acting, and reasoning\r\n\r\nAnatomy of an Agent\r\n\r\n- The three basic functions: task decomposition, tool use, and code synthesis\r\n- How frameworks like LangChain and Python make it easy to chain these together\r\n\r\n\r\nWhy Small Models Are Catching Up\r\n- Review of research from NVIDIA and Georgia Tech\r\n- Benchmarks showing SLMs matching or exceeding performance of larger LLMs\r\n- Cost, latency, and deployability tradeoffs\r\n\r\nHands-On Demo: Building and Running an Agent on a Laptop\r\n\r\n- Using LangChain and Python to orchestrate reasoning, tool calls, and code execution\r\n- Example workflow: \u201cPlan a dataset cleanup pipeline\u201d using an SLM\r\n- Observing resource use, latency, and performance in real time\r\n\r\n\r\nKey Takeaways and Open Research Directions\r\n\r\n- Opportunities for local and edge deployments\r\n- The emerging role of SLMs in allowing everyone to experiment with agents\r\n- Future questions: scaling reasoning vs. scaling models", "recording_license": "", "do_not_record": false, "persons": [{"code": "ZUECPC", "name": "Serhii Sokolenko", "avatar": "https://pretalx.com/media/avatars/ZUECPC_sFofzBE.webp", "biography": "Serhii Sokolenko is a co-founder of Tower, a Pythonic platform for data flows and agents running on top of open analytical storage. Prior to founding Tower, Serhii worked at Databricks, Snowflake and Google on data processing and databases.", "public_name": "Serhii Sokolenko", "guid": "c94dd5e4-64fe-536d-bfa5-f451831b6f65", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/ZUECPC/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/YZM8TA/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/YZM8TA/", "attachments": []}, {"guid": "d014eb6d-2de7-544e-9790-77ce9b9e167a", "code": "3BYLZU", "id": 88072, "logo": null, "date": "2026-04-15T11:35:00+02:00", "start": "11:35", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-88072-building-secure-environments-for-cli-code-agents", "url": "https://pretalx.com/pyconde-pydata-2026/talk/3BYLZU/", "title": "Building Secure Environments for CLI Code Agents", "subtitle": "", "track": "General: Autonomous Systems & AI Agents", "type": "Talk", "language": "en", "abstract": "AI code agents like Claude Code are powerful but require careful isolation. Learn how to run them in secure containers with persistent credentials, API logging, and complete filesystem isolation\u2014protecting your host system while maintaining full functionality.", "description": "AI-powered code agents like Claude Code can autonomously edit files, run commands, and interact with your development environment. This power comes with risks: unrestricted filesystem access, exposed credentials, and unmonitored API usage. How do you harness this capability safely?\r\n\r\nThis talk presents a practical containerization approach for running CLI code agents in complete isolation from your host system. You'll learn how to build secure environments that maintain persistent authentication, enable workspace access through volume mounts, and provide full API request logging, all while keeping the agent sandboxed.\r\n\r\nI'll demonstrate a production-ready setup using Docker containers that includes credential management, an API proxy for request logging and monitoring, and Datasette integration for analyzing API usage patterns. You'll see how to structure volumes for security, implement network isolation, and maintain developer productivity while enforcing safety boundaries.", "recording_license": "", "do_not_record": false, "persons": [{"code": "VED8E9", "name": "Harald Nezbeda", "avatar": "https://pretalx.com/media/avatars/VED8E9_ZZ5XdPl.webp", "biography": "Hi, my name is Harald and I'm a passionate Python developer interested in development, DevOps and AI. I'm currently located in Austria working as a Senior Software Developer and Python Technical Leader for [Anexia](https://anexia.com/).\r\n\r\nI also work on [Open Source Projects](https://nezhar.com/pages/projects) and write Articles and Tutorials on my [blog](https://nezhar.com/).", "public_name": "Harald Nezbeda", "guid": "7252aecc-bf37-59c0-8cf8-36a3e9684770", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/VED8E9/"}], "links": [{"title": "Slides", "url": "https://nezhar.com/slides/pycon-de-26/", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/3BYLZU/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/3BYLZU/", "attachments": []}, {"guid": "f6f38b7b-f313-5f97-b7e8-274cecfc5c9e", "code": "MJTQEJ", "id": 87719, "logo": null, "date": "2026-04-15T14:20:00+02:00", "start": "14:20", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-87719-mastering-the-hex-a-case-study-in-reinforcement-learning-for-strategy-games", "url": "https://pretalx.com/pyconde-pydata-2026/talk/MJTQEJ/", "title": "Mastering the Hex: A Case Study in Reinforcement Learning for Strategy Games", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk", "language": "en", "abstract": "What does it take to build an AI that learns to play strategy games from scratch? Over the past year, I chose to explore this question out of personal fascination with game AI \u2014 as a seminar project for college, but really as a hobby. The result was a complete reinforcement learning environment for Antiyoy, a turn-based strategy game played on hexagonal grids.\r\n\r\nThe journey raised intriguing challenges: How do you represent hexagonal game boards for neural networks? What do you do when your AI has over 4,000 possible actions to choose from? How do you design rewards that teach strategy rather than just reward flailing in the right direction? This talk shares how these problems were approached using Python's modern ML ecosystem\u2014Gymnasium, PyTorch, and PPO training\u2014ultimately producing an agent that wins nine out of ten games against a random opponent. Whether that qualifies as \"strategic play\" is a question the agent and I still disagree on.\r\n\r\nWhether you're curious about building custom RL environments, interested in game AI, or just wondering what reinforcement learning actually looks like when it half-works, you'll leave with practical insights and a healthy dose of realistic expectations.", "description": "### Context and Motivation\r\n\r\nThis talk emerged from a year-long journey that began with a simple curiosity: could I teach a computer to play strategy games by itself? It started as a college seminar project, but the topic was chosen purely out of personal interest in reinforcement learning and game AI \u2014 this was a hobby from the start. Rather than working with pre-built environments like CartPole or Atari games, the goal was to understand the entire pipeline\u2014from implementing game mechanics to training a neural network that actually learns to win.\r\n\r\nThe game chosen was Antiyoy, a minimalist turn-based strategy game where players control territories on hexagonal grids, build units and structures, manage resources, and compete for dominance. While the game is simple enough to understand, it presents genuine strategic depth\u2014exactly the kind of challenge that makes reinforcement learning both difficult and rewarding.\r\n\r\nThe talk walks through the complete development process, focusing not on implementation minutiae but on the fundamental questions and design decisions that anyone building similar systems would encounter. You won't see walls of code or detailed mathematical derivations. Instead, you'll hear about the thinking process, the challenges faced, and the solutions that emerged\u2014all with the goal of demystifying what it actually takes to build a learning agent for complex games.\r\n\r\n---\r\n\r\n### What Will You Learn?\r\n\r\nThe talk is structured around three core challenges that define this kind of project, presented as questions that the work had to answer:\r\n\r\n**How do you turn a game into something a neural network can understand?**\r\n\r\nStrategy games aren't naturally suited for machine learning. Antiyoy is played on hexagonal grids, uses discrete turn-based actions, and involves complex state information\u2014territory ownership, unit positions, economic resources, and more. The talk explores how to bridge this gap: representing hexagonal coordinates in ways that computers can efficiently process, encoding complete game state into multi-channel observations similar to those used in AlphaZero, and designing observation spaces that preserve spatial relationships for convolutional networks. You'll hear about the choice between different coordinate systems, the challenge of maintaining game history for temporal reasoning, and how to normalize diverse information types (positions, money, turn counts) into a coherent input for neural networks.\r\n\r\n**How do you handle massive action spaces without overwhelming your AI?**\r\n\r\nWhen your agent has more than 4,000 possible actions at any given moment\u2014moving units to different positions, building various types of units and structures, or ending the turn\u2014training becomes a serious challenge. Most of these actions are illegal at any given time, yet a naive approach would force the agent to learn this the hard way. The talk discusses how action masking solves this problem by dynamically filtering the action space to only legal moves, dramatically improving learning efficiency. You'll understand why this technique is crucial for games with complex rules and how it fundamentally changes the training dynamics compared to environments where every action is always available.\r\n\r\n**How do you design rewards that actually teach strategy?**\r\n\r\nPerhaps the most subtle challenge in reinforcement learning is reward design. Give an agent only a +1 for winning and -1 for losing, and it may take forever to figure out what behaviors lead to victory. But add too many intermediate rewards, and you risk the agent exploiting shortcuts rather than learning genuine strategy. The talk shares the experimentation process: starting with sparse rewards as a baseline, carefully introducing intermediate signals for meaningful actions like territory expansion and economic development, and ultimately landing on a reward structure that accelerated learning while still encouraging strategic play. You'll see how reward shaping influenced training speed and final performance, and learn to think about reward design as a crucial part of the development process rather than an afterthought.\r\n\r\n---\r\n\r\n### What Are the Results and Takeaways?\r\n\r\nAfter training over several thousand episodes\u2014which took about eight hours on a consumer-grade GPU\u2014the agent learned to win approximately nine out of ten games against a baseline random opponent. To be precise about what that means: the baseline picks uniformly from legal moves, so the bar is not high. The trained agent makes progress through a game, expands territory, and occasionally does things that look like they could be intentional. It also makes plenty of moves that defy easy explanation. \"Strategy\" might be a generous word; \"learned to flail more purposefully\" is closer to the truth.\r\n\r\nThe talk concludes by reflecting on what worked well and what proved unexpectedly difficult\u2014and what is still unresolved. Action masking emerged as perhaps the single most impactful technique for managing complexity. The choice of observation space design\u2014borrowing ideas from AlphaZero's approach to representing board games\u2014turned out to be well-suited for the problem. Training infrastructure using MLflow provided invaluable insight into the learning process and made experimentation much more manageable. On the challenging side: reward design required multiple iterations and still produced an agent that plays competently but not strategically. The gap between \"beats random\" and \"actually plays well\" is humbling and, it turns out, enormous.", "recording_license": "", "do_not_record": false, "persons": [{"code": "VX8JGC", "name": "Simon Hedrich", "avatar": "https://pretalx.com/media/avatars/VX8JGC_Tcsgwbs.webp", "biography": "Simon Hedrich is a computer scientist and AI enthusiast currently completing his Master\u2019s degree in Computer Science. His academic and professional journey is marked by a deep interest in bridging the gap between theoretical research and practical AI engineering.\r\n\r\nThrough his work at inovex GmbH, Simon has demonstrated expertise in specialized areas of Artificial Intelligence, including computer vision and the use of synthetic data to enhance small object detection. His technical writing highlights his ability to leverage generative AI models, such as Stable Diffusion, to solve complex real-world challenges like training data scarcity.", "public_name": "Simon Hedrich", "guid": "8234c23d-4114-5632-9511-585cb42e0d09", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/VX8JGC/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/MJTQEJ/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/MJTQEJ/", "attachments": []}, {"guid": "17f88737-1b12-58d7-bd4e-641ac7c6e9ab", "code": "JBFGCA", "id": 85346, "logo": null, "date": "2026-04-15T15:00:00+02:00", "start": "15:00", "duration": "00:45", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-85346-building-agentic-systems-with-python-langgraph-mcp-and-a2a", "url": "https://pretalx.com/pyconde-pydata-2026/talk/JBFGCA/", "title": "Building Agentic Systems with Python, LangGraph, MCP, and A2A", "subtitle": "", "track": "General: Autonomous Systems & AI Agents", "type": "Talk (long)", "language": "en", "abstract": "Building an agentic system that collects and evaluates company information in real time\u2014without curated datasets\u2014requires solving difficult challenges in data acquisition, quality control, and agent orchestration.\r\n\r\nThis talk outlines the solution design for such a system, implemented with Python-based tooling including LangGraph, and emerging protocols such as A2A and MCP, within a multi-agent workflow. Because MCP and A2A are still new and lightly documented, we will share implementation lessons and a practical example of a hub-and-spoke architecture based on a recent real-world system.\r\n\r\nAttendees will learn architectural patterns for multi-agent systems, common pitfalls of using MCP/A2A in real-world scenarios, and strategies for maintaining data quality in agent-based workflows.", "description": "# What we are going to show\r\n\r\n- A live demo of a Python-based multi-agent system that retrieves, aggregates, and evaluates company information in real time.\r\n- The overall solution architecture: how LangGraph, MCP, A2A, and custom Python components fit together.\r\n- Key implementation lessons from building the system, covering both technical and business challenges.\r\n\r\n# What problem is our talk addressing\r\n\r\nAI analysis depends heavily on data. When systems cannot rely on pre-collected or curated datasets, developers must find, collect, and validate data of sufficient quality.\r\n\r\nAt the same time, emerging technologies such as MCP, A2A, and LangGraph are evolving quickly, with limited documentation, occasional breaking changes, and examples that rarely scale beyond minimal tutorials. Applying these tools to real-world Python applications introduces challenges in design, orchestration, versioning, and error handling that are not yet widely discussed.\r\n\r\n# Why is the problem relevant to the audience\r\n\r\nMany Python developers and data practitioners will soon need to build systems that combine LLMs, external data sources, and multi-agent logic, without relying on static datasets. This talk provides practical guidance for designing such systems using open-source Python tooling.\r\n\r\nThe presented solution is designed with a modular, scalable component approach. MCP and A2A protocols facilitate the connection between AI-related solutions, and this design demonstrates re-usable patterns for implementation.\r\n\r\nBy sharing our approach, design choices, and implementation pitfalls, the talk equips attendees to anticipate challenges early, evaluate whether MCP/A2A are appropriate for their own projects, and build more robust agentic systems.\r\n\r\n# What is our solution to the problem\r\n\r\nOur solution has split responsibilities in several blocks, though the overall idea is to present with code examples a Python system that combines LangGraph, A2A and MCP:\r\n\r\n- Data access via MCP servers\r\nMCP servers retrieve data from multiple sources (e.g., LinkedIn APIs, web scraping endpoints, Perplexity research). Using MCP makes it easy to plug in new data sources and manage them consistently. We demonstrate how to build and connect MCP servers in Python.\r\n- Data processing via LangGraph agents\r\nA set of agents implemented in LangGraph handle tasks such as coordinating the workflow, collecting company data, calculating evaluation scores, and validating results. These agents operate in a hub-and-spoke pattern centered around a coordinator agent. We show how this is implemented in Python using LangGraph.\r\n- Inter-agent communication via A2A\r\nAgents exposes capability \u201ccards,\u201d which the coordinator aggregates into a registry. An intent-detection step determines which agents should be invoked to answer a user's request. We demonstrate how A2A can be applied in Python to orchestrate agents effectively.\r\n- Data validation agent\r\nA dedicated validation agent checks retrieved data against defined rules to ensure quality. While no internet-sourced data is perfect, this approach significantly increases reliability. We show how validation logic is implemented within the LangGraph flow.\r\n- Scalability through configuration and deployment\r\nA centralized configuration file and simple Docker-based deployment make the system easy to scale and adapt. We explain how environment variables and shared configuration patterns can coordinate the various Python components.\r\n\r\n# What are the main takeaways from our talk\r\n\r\nAttendees will learn:\r\n\r\n- How to design and implement a practical multi-agent architecture using Python, LangGraph, MCP, and A2A.\r\n- How to acquire and validate external data dynamically without relying on curated datasets.\r\n- Common pitfalls and lessons from using MCP and A2A in larger-scale systems.\r\n- How to structure agent roles, orchestration flows, and validation strategies for scalable, extendable AI systems.", "recording_license": "", "do_not_record": false, "persons": [{"code": "K9VNNX", "name": "Holger N\u00f6sekabel", "avatar": "https://pretalx.com/media/avatars/K9VNNX_QvQEiJL.webp", "biography": "Holger N\u00f6sekabel has deep experience in data ecosystems, applied data science, and building production-grade systems with multidisciplinary teams. As CTO at TD Reply, he leads more than 20 engineers, data scientists, and visualization specialists in developing internal data products and delivering complex analytics projects for global Fortune 500 companies.\r\n\r\nBefore taking on the CTO role, Holger served as Director of Technical Consulting, supporting engineering teams and advising major brands on data-driven strategy. He is also a Certified ScrumMaster and an advocate for practical, team-focused agile practices.\r\n\r\nHolger enjoys working at the intersection of data, product development, and real-world impact - bringing technical insights to diverse audiences and helping teams turn ideas into reliable, scalable solutions.", "public_name": "Holger N\u00f6sekabel", "guid": "2f4ed7a1-a121-5258-bc6a-63cadf24bda8", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/K9VNNX/"}], "links": [{"title": "GitHub Repository", "url": "https://github.com/hnoesekabel/ai_agents_pycon2026", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/JBFGCA/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/JBFGCA/", "attachments": [{"title": "Presentation", "url": "/media/pyconde-pydata-2026/submissions/JBFGCA/resources/260415_GTL3Ozf.pdf", "type": "related"}]}, {"guid": "15bf630e-92cf-5826-b097-029a26fbdccb", "code": "XGL37G", "id": 87679, "logo": null, "date": "2026-04-15T16:15:00+02:00", "start": "16:15", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-87679-tidy-finance-in-practice-how-explicit-assumptions-avoid-bad-investment-strategies", "url": "https://pretalx.com/pyconde-pydata-2026/talk/XGL37G/", "title": "Tidy Finance in Practice: How Explicit Assumptions Avoid Bad Investment Strategies", "subtitle": "", "track": "General: Others", "type": "Talk", "language": "en", "abstract": "Many investment strategies look convincing because they performed well in the past, but these results are often easy to misread and do not always say much about how the strategy would work in the future. In many cases, strong backtest results come not from real skill or insight, but from hidden rules, unclear data choices, or unrealistic assumptions. In this talk, I show how Tidy Finance principles help make these issues visible and easier to examine. Using clear examples from Tidy Finance with Python, I demonstrate that once assumptions are made explicit, many impressive results no longer hold up.", "description": "Many investment strategies look great because they performed well in the past. However, it is often unclear why they work or whether they would still work in the future. Strong backtest results are frequently driven by hidden assumptions, unclear data handling, or unrealistic rules rather than real skill or insight.\r\n\r\nIn this talk, I show how Tidy Finance principles help people better understand what is actually happening inside a financial backtest. Tidy Finance has become a popular open-source teaching and learning platform for empirical financial research. Its core idea is simple: financial analyses should be built from clear, well-structured data that makes assumptions easy to see and results easy to reproduce. \r\n\r\nUsing explicit examples from Tidy Finance with Python during the talk, I go through a real backtesting workflow and show how it changes when assumptions are written down clearly instead of being hidden inside the code. I demonstrate how small, often overlooked choices can have a large impact on results, and how these effects become visible when the analysis is structured cleanly. The focus is on learning how to read and question backtests, not on presenting new models or strategies.", "recording_license": "", "do_not_record": false, "persons": [{"code": "XTMJSZ", "name": "Christoph Frey", "avatar": "https://pretalx.com/media/avatars/XTMJSZ_YitBdr1.webp", "biography": "Christoph Frey is a Quantitative Researcher and Portfolio Manager at a family office in Hamburg and Research Fellow at the Centre for Financial Econometrics, Asset Markets and Macroeconomic Policy at Lancaster University. Before this, he was the leading quantitative researcher for systematic multi-asset strategies at Berenberg Bank and worked as an Assistant Professor at the Erasmus Universiteit Rotterdam. Christoph published research on Bayesian Econometrics and specializes in financial econometrics and portfolio optimization problems.", "public_name": "Christoph Frey", "guid": "e79a3c72-d1e3-5b7d-9518-f3384d0dc175", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/XTMJSZ/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/XGL37G/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/XGL37G/", "attachments": [{"title": "tidy finance backtest", "url": "/media/pyconde-pydata-2026/submissions/XGL37G/resources/tidy_f_ZjMPP5q.pdf", "type": "related"}]}, {"guid": "6395c017-cdad-57ac-b7ac-ff8993381c19", "code": "8YTYEN", "id": 87166, "logo": null, "date": "2026-04-15T16:55:00+02:00", "start": "16:55", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-87166-octopus-automl-extracting-signal-from-small-and-high-dimensional-data", "url": "https://pretalx.com/pyconde-pydata-2026/talk/8YTYEN/", "title": "Octopus AutoML: Extracting Signal from Small and High-Dimensional Data", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk", "language": "en", "abstract": "Many machine learning tools assume abundant, independent data, rely on a single data split plus cross-validation, and leave test-set separation to the user.\r\n\r\nIn application-driven domains such as industrial materials science and pharmaceutical development, data are scarce, high-dimensional, and often correlated, creating conditions under which standard ML pipelines frequently fail. Small datasets are highly sensitive to the random seed used for splitting, and common pitfalls such as feature selection before splitting or distributing correlated samples across train and test sets cause data leakage and inflated performance metrics.\r\n\r\nOctopus is an open-source Python AutoML library explicitly designed for small-data, high-dimensional regime. It enforces strict nested cross-validation for model and hyperparameter selection, quantifies performance variability across multiple splits, and tightly controls data leakage. Its modular architecture embeds an internal ML engine, several feature selection methods (e.g., MRMR, Boruta), and external AutoML solutions such as AutoGluon into a unified, rigorous validation framework, enabling systematic and fair comparison of methods on limited data. In addition, Octopus supports survival analysis, addressing time-to-event problems common in healthcare and materials science. This talk will use realistic small-scale datasets to illustrate how conventional pipelines can be misleading and how to obtain more reliable models when every sample matters.", "description": "Many machine learning tools are based on the quiet assumption that data is plentiful, independent, and identically distributed, and that a random training/testing split, plus a little cross-validation, is \u201cgood enough\u201d. In application-driven domains such as pharmaceutical development and industrial materials science, however, this is often not the case. Synthesizing a new compound can take months and early phase clinical trials are small, so we often work with fewer than 1,000 samples and several thousands of features. In this context, standard AutoML practice can be dangerously optimistic.\r\n\r\nOn small datasets, performance can vary significantly depending on the random seed used for splitting the data. Working with a single split exposes us to this randomness: with an unlucky seed we might prematurely abandon promising experiments, while a particularly favorable seed can lead to overestimating the true performance. Another major risk is data leakage, such as performing feature selection before splitting the data, or distributing correlated samples (e.g., repeated measurements from the same patient or material batch) across both training and test sets. Such leakage inflates evaluation metrics and produces models that fail to generalize to new data.\r\n\r\nOctopus is an open-source Python AutoML library designed specifically for small and high-dimensional datasets. Its core idea is simple: make statistically honest evaluation the default. Octopus enforces strict nested cross-validation, with an inner loop for model and hyperparameter selection and an outer loop that provides generalization performance estimates. Thanks to this nested setup, users also obtain an estimate of how much performance varies across multiple data splits; low variation increases trust in the reported results. Furthermore, because Octopus handles the entire data-splitting process and is carefully designed to avoid information leakage, the reported metrics are far less likely to be inflated.\r\n\r\nOur library provides a robust drop-in replacement for existing machine learning workflows, ensuring a principled implementation of nested cross-validation while leveraging advanced machine learning techniques in the background. Adopting a modular architecture, the library offers a dedicated, internally developed ML module, seamless integration of several feature selection methods (e.g., MRMR, Boruta), and support for external ML solutions such as AutoGluon. This modular design makes Octopus a powerful platform for benchmarking different methods and solutions on specific datasets and use cases, helping users systematically compare and select the most suitable approach for their problem\r\n\r\nOctopus also supports time-to-event (survival) problems, which are common healthcare (e.g. time to progression or death) and in materials science (e.g. time to failure or degradation). Survival models are evaluated using appropriate metrics within the same nested cross-validation framework. \r\n\r\nThis talk will demonstrate, using realistic small-scale datasets, how standard AutoML pipelines can report deceptively strong performance and how these metrics change when proper nested cross-validation and domain-aware splits are applied. Attendees will learn where typical mistakes originate and how Octopus establishes practical safeguards against them. The goal is straightforward: to produce better models and more reliable conclusions when data are scarce and every sample matters.", "recording_license": "", "do_not_record": false, "persons": [{"code": "EYAZMY", "name": "Nils Haase", "avatar": null, "biography": "Nils is Lead Data Scientist at Merck KGaA, Darmstadt, Germany, where he builds and productionizes machine learning solutions in Python. He earned his PhD in Physics from Universit\u00e4t Augsburg and has his background in R&D and material development. This path allows him to bridge domain-heavy lab and engineering problems with modern ML tooling, turning complex industrial data into robust, deployable systems.", "public_name": "Nils Haase", "guid": "5792df56-295c-5e08-bb69-92ae677b0d10", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/EYAZMY/"}, {"code": "TLZK97", "name": "Andreas Wurl", "avatar": null, "biography": "Lead Data Scientist at Merck Healthcare KGaA \r\nClinical Measurement Sciences, Biomarker development\r\n\r\n\r\nsee Linkedin", "public_name": "Andreas Wurl", "guid": "c4a16629-bc7f-5d87-a35e-d457640dfbd1", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/TLZK97/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/8YTYEN/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/8YTYEN/", "attachments": [{"title": "Octopus AutoML, slide deck", "url": "/media/pyconde-pydata-2026/submissions/8YTYEN/resources/20260_sjQiIwy.pptx", "type": "related"}]}, {"guid": "0f24c906-9a6c-5b0c-884a-b6664d12dcd8", "code": "K9LCNQ", "id": 88260, "logo": null, "date": "2026-04-15T17:35:00+02:00", "start": "17:35", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-88260-heat-scaling-the-python-scientific-stack-to-hpc-systems", "url": "https://pretalx.com/pyconde-pydata-2026/talk/K9LCNQ/", "title": "Heat: scaling the Python scientific stack to HPC systems", "subtitle": "", "track": "PyData: PyData & Scientific Libraries Stack", "type": "Talk", "language": "en", "abstract": "Python\u2019s scientific stack (NumPy/SciPy) is often confined to single-node execution. When datasets exceed local memory, researchers face a steep learning curve, typically choosing between complex manual distribution or the overhead of task-parallel frameworks.\r\n\r\nIn this talk, we introduce [Heat](https://github.com/helmholtz-analytics/heat), an open-source distributed tensor framework designed to bring high-performance computing (HPC) capabilities to the scientific Python ecosystem. Built on PyTorch and mpi4py, Heat implements a data-parallel model that allows users to process massive datasets across multi-node, multi-GPU clusters (including AMD GPUs) with minimal code changes.\r\n\r\nWe will discuss the design and architecture enabling \"transparent distribution\":\r\n\r\n- Heat\u2019s distributed n-dimensional array for data partitioning and communication under the hood;\r\n- The synergy of PyTorch as a high-performance compute engine and MPI for efficient, low-latency communication;\r\n- Scaling efficiency, encompassing both strong and weak scaling for memory-intensive operations;\r\n- Fundamental building blocks\u2014from linear algebra to machine learning\u2014re-implemented for distributed memory space.\r\n\r\nAttendees will learn how to leverage the cumulative RAM of supercomputers without leaving the familiar NumPy-like interface, effectively removing the \"memory wall\" for large-scale scientific analytics.", "description": "**Memory bottleneck in scientific computing (4 minutes)**\r\n - Limitations of single-node libraries\r\n - Complexity of existing workarounds: trade-offs between manual MPI programming (high developer effort) and task-parallel frameworks \r\n - The data-parallel alternative: performing uniform operations on distributed slices of a global tensor.\r\n\r\n**Architecture and implementation (8 minutes)**\r\n- The DNDarray structure: Technical breakdown of the distributed n-dimensional array, which provides a global logical view while managing local physical storage across MPI ranks.\r\n- The split axis concept: How data is partitioned along specific dimensions (e.g., rows or columns) to optimize communication for different mathematical operations.\r\n- Backend synergy: \r\n  - PyTorch as the compute engine for high-performance local tensor operations and GPU acceleration.\r\n  - mpi4py for communication in cluster environments.\r\n- Hardware interoperability: Transparent execution across CPUs and GPUs, including NVIDIA (CUDA) and AMD (ROCm) accelerators.\r\n\r\n**Algorithmic building blocks for distributed memory (8 minutes)**\r\n- Communication-aware linear algebra: Distributed matrix-matrix multiplication and its communication costs. Advanced matrix decomposition methods, such as hierarchical and randomized SVD (hSVD), for massive datasets.\r\n- Scalable machine learning and statistics: Example: clustering (K-Means) and Principal Component Analysis (PCA) on distributed arrays.\r\n- Temporal analysis using Dynamic Mode Decomposition (DMD) on large-scale scientific data like global wind speeds.\r\n\r\n**Performance and scaling efficiency (7 minutes)**\r\n- Scaling methodologies: strong scaling (speedup for a fixed problem size) and weak scaling (efficiency as both problem size and resources grow).\r\n- Memory wall removal: Utilizing the cumulative RAM of many cluster nodes to process datasets that are otherwise impossible to load.\r\n- Case studies: Reviewing performance results from large-scale runs\r\n\r\n**Summary and project roadmap (3 minutes)**\r\n- Key takeaways\r\n- Upcoming features\r\n- Open-source community", "recording_license": "", "do_not_record": false, "persons": [{"code": "JKT3YT", "name": "Claudia Comito", "avatar": "https://pretalx.com/media/avatars/JKT3YT_Em42plg.webp", "biography": "I work in the Large-Scale Data Science division at the J\u00fclich Supercomputing Centre (JSC), and I lead the development of Heat, an open-source distributed tensor framework designed for high-performance data analytics. My work focuses on scaling scientific Python applications across multi-node, multi-GPU clusters.\r\n\r\nMy background is in astrophysics, I joined JSC in 2018 to co-design distributed analytics for scientific domains including aerospace and Earth system modeling. Since 2021, I have led the Heat project, focusing on technical user support, community growth, and project dissemination.", "public_name": "Claudia Comito", "guid": "8aa82eb8-2b83-5027-a2d2-051de14cb728", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/JKT3YT/"}, {"code": "BELFDP", "name": "Thomas Saupe", "avatar": null, "biography": "I do research and software engineering at J\u00fclich Supercomputing Centre. My interests are primarily doing numerics with Python, but I accept that I have to be interested in ML now as well.", "public_name": "Thomas Saupe", "guid": "c14cd22e-b761-5593-8cbe-8efeadcb9657", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/BELFDP/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/K9LCNQ/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/K9LCNQ/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/K9LCNQ/resources/Heat_p_ouEFQAY.pdf", "type": "related"}]}], "Palladium [2nd Floor]": [{"guid": "42f14225-1ba2-5f8f-bb9e-f9c3d631665a", "code": "APWGQB", "id": 86419, "logo": null, "date": "2026-04-15T10:15:00+02:00", "start": "10:15", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-86419-from-pixel-to-payouts-a-multi-agent-system-for-real-time-insurance-claims-processing", "url": "https://pretalx.com/pyconde-pydata-2026/talk/APWGQB/", "title": "From Pixel to Payouts: A Multi-Agent System for Real-Time Insurance Claims Processing", "subtitle": "", "track": "General: Autonomous Systems & AI Agents", "type": "Talk", "language": "en", "abstract": "The traditional process for auto damage evaluation is relatively slow, subjective, and prone to fraud. With this presentation, the goal is to show a Multi-Agent System designed for the automation and standardization in real-time of the car damage evaluation, disrupting the initial claims workflow. The system is built around an Orchestrator Agent with the role to coordinate specialized AI agents: a Vision Agent (powered by OpenAI GPT-5.2) for damage analysis and severity classification, two Cost Estimation Agents (powered by Perplexity's sonar-pro) to provide comparative quotes (OEM vs. Aftermarket), and a Shop Finder Agent for local repair options. The system produces a report that includes a description of the damage, severity, comparative repair costs in local currency, and recommended repair shops, all embedded into a Gradio/Streamlit interface. The task of this approach is to reduce the processing time, improve transparency for customers, and provide insurers with objective data to enable faster claims resolution.", "description": "**Project Goal and Business Impact**\r\nImagine filing an auto insurance claim. Instead of waiting days for a damage evaluation, photograph the car with your phone and, within minutes, receive a detailed assessment.\r\nThe primary objective of this project is to drastically improve the efficiency and objectivity of the initial auto insurance claim process. Current methods rely heavily on human adjusters and manual estimates, resulting in delays and potential cost inflation. By deploying a sophisticated Multi-Agent System, the aim is to provide a fastly, data-driven assessment that benefits both the insurer and the customer.\r\n**The Multi-Agent Architecture**\r\nAt the heart of this solution, there is an orchestrated system of specialized AI agents, each with a distinct role. The architecture follows a sketch where an Orchestrator Agent works as the brain, creating execution plans, managing agent lifecycle, coordinating the execution, and aggregating results into coherent outputs.\r\nThe Vision Agent, powered by OpenAI GPT-5.2, acts as the system's eyes. It analyzes uploaded damage photos with technical precision, identifying specific damaged parts (bumpers, panels, headlights, etc.), classifying severity levels (minor, moderate, severe), categorizing damage types (collision, scratch, dent, paint damage), and generating detailed technical assessments. \r\nTwo specialized Cost Estimation Agents run, representing different repair philosophies. The OEM (Original Equipment Manufacturer) Agent focuses on premium repairs using manufacturer-certified parts from authorized dealers, while the Aftermarket Agent explores cost-effective alternatives using quality certified aftermarket parts from independent shops. Both agents are powered by Perplexity's sonar-pro model, which provides access to current market data and pricing information. \r\nThe Shop Finder Agent searches for repair facilities near the user's location, provides contact information, ratings, and availability, and adapts its search strategy based on the information retrieved.\r\n**Technical Highlights**\r\nThe system is built in Python, leveraging several key technologies. The Gradio/Streamlit framework provides an intuitive web interface for image upload, location input, and real-time results display. OpenAI's GPT-5.2 handles computer vision tasks. Perplexity's sonar-pro model accesses current market data for repair costs and local business information. \r\nA sophisticated state management system provides each agent with memory of past interactions, confidence scores to assess decision quality, performance tracking to optimize the system, and context-aware autonomous decision-making. \r\nAt the core of each agent's execution is the ReAct loop: a Reasoning, Action, Observation cycle. Each agent doesn't just call an API and return a result; it first records a thought explaining why it's taking an action, executes the action, and then logs its observations. This trace is accumulated across all agents and surfaced in the UI as a collapsible reasoning log, making every decision in the pipeline fully auditable and transparent.\r\n**Generative AI vs. Manual/Traditional Tools**\r\nWhile traditional automated tools rely on rigid, rule-based computer vision and static databases, this Multi-Agent System introduces a modular reasoning layer that bridges the gap between raw data and decision-making. According to the industry research from McKinsey (2025) the agentic workflows reduce claim cycle times from days to seconds with consistency in claim evaluations.\r\nTraditional tools are often \"black boxes\" or monolithic scripts, instead this modular architecture give the opportunity to develop in the future every task as a swappable module for an hybrid framework where every single agent can be replaced by a non Generative AI tool, for flexible, custom and scalable solution.  \r\n**The Future of Insurance Claims**\r\nThis multi-agent architecture is a robust, scalable blueprint for automating complex decision-making business processes, such as insurance claims. It leverages the strengths of several large language models (LLMs) and specialized agents to deliver a fast, transparent, and comprehensive output that far exceeds the capabilities of a single model. The project demonstrates practical, real-world applications of multi-agent systems in production environments.\r\n\r\n**Links:**\r\n[Article](https://medium.com/@c.giancaterino/from-pixel-to-payouts-a-multi-agent-system-for-real-time-insurance-claims-processing-d647298c4eb8)\r\n[Hugging Face App](https://huggingface.co/spaces/towardsinnovationlab/AI_Car_Damage_Evaluation)\r\n[Repository](https://github.com/claudio1975/AI_Car_Damage_Evaluation)", "recording_license": "", "do_not_record": false, "persons": [{"code": "VDZH9N", "name": "Claudio Giorgio Giancaterino", "avatar": "https://pretalx.com/media/avatars/VDZH9N_Xv31nQl.webp", "biography": "I have Statistics & Actuarial background\r\nI'm an Actuary during the day\r\nand AI Scientist in the free time", "public_name": "Claudio Giorgio Giancaterino", "guid": "c2335e02-33a4-50ec-a031-320c25ad3203", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/VDZH9N/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/APWGQB/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/APWGQB/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/APWGQB/resources/PyconD_r5ygAON.pdf", "type": "related"}]}, {"guid": "e4e2ee48-62e9-5b84-aff3-bc0656201909", "code": "S7KYEE", "id": 88386, "logo": null, "date": "2026-04-15T10:55:00+02:00", "start": "10:55", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-88386-no-you-can-t-eval-your-way-to-fairness", "url": "https://pretalx.com/pyconde-pydata-2026/talk/S7KYEE/", "title": "No, you can't 'eval' your way to fairness", "subtitle": "", "track": "General: Ethics & Privacy", "type": "Talk", "language": "en", "abstract": "Fairness is fundamentally not tractable to classic optimisation techniques. It's not a state of the world, it's an experience of it. No technology is fair in a vacuum - fairness can only be understood when a technical system collides with humans.\r\n\r\nWe're seeing a wave of off-the-shelf libraries measuring bad behaviours in LLM outputs, often simplifications of older fairness metrics. They can catch obvious failure modes like slurs. But this is one failure mode among many. Installing a library and calling the job done is fairness washing. The harder, more fruitful approach is to explore the space of failure modes, consider what an ideal world would look like, and design measures, mitigations, and feedback loops accordingly.\r\n\r\nThis is a talk for people who suspect we can't optimise our way to human dignity.", "description": "**Cold open**\r\nFairness is fundamentally not tractable to classic optimisation techniques.\r\n\r\n**The exposition**\r\nFairness is not a state of the world, it's an experience of it. No technology is fair in a vacuum. Fairness can only be understood when a technical system collides with humans in the world. It is felt as much as it is calculated. We can look at statistical results in aggregate to understand patterns, but these do not tell the story of the individual.\r\n\r\nFurther, attempting to optimise numerical fairness metrics is fundamentally coercive and technocratic: putting our thumb on the scale globally, injecting \"positive bias\" into single dimensions, framing fairness as a data problem rather than a problem of human dignity. It's a \"one metric to rule them all\" approach that fails to acknowledge differences in preference, culture, experience. To build systems that support human agency we must first abandon our idea of a single moral machine which consistently outputs correct answers from inputs and algorithms. Any system treating people as fungible or undifferentiated is structurally unfair.\r\n\r\nWhat might consent-based fairness look like instead? Asking \"Do you want extra help?\", making sure individual preferences and self-reported disadvantage can add a layer of human respect into the equation. But we rarely see even this. Instead we see universalist design that decides what's good for people without consulting them - the same pattern that Design Justice critiques as erasing those who experience intersectional disadvantage.\r\n\r\nWhat does this have to do with evals? We're seeing a wave of off-the-shelf libraries measuring bad behaviours in LLM outputs, often simplifications of older fairness metrics. And yes, they can catch obvious failure modes like slurs in outputs. But this is one failure mode among many. Installing a library and calling the job done is fairness washing. The harder, more fruitful approach is to explore the space of failure modes, consider what an ideal world would look like, and design measures, mitigations, and feedback loops accordingly. It also means grappling with the fact that we cannot avoid doing harm. What we can do is harm reduction, humility, and striving toward something better while acknowledging the impossibility of the task.\r\n\r\n**Third act**\r\nThis talk won't offer easy answers. Attend if you want to grapple with the gnarly problems of building systems for humans. We'll borrow ideas from Design Justice and the disability rights movements: nothing about us without us. Let's ask and answer better questions. You'll leave with sharper mental models and tools for the next tricky conversation at work.\r\n\r\n**Outline (30 minutes):**\r\nThe problem (10 min): \r\n- Fairness as experience, not state. \r\n- Why optimisation fails. \r\n- The individual vs the aggregate. \r\n- Why treating people as fungible is structurally unfair.\r\n\r\nThe critique (10 min): \r\n- Off-the-shelf fairness evals as fairness washing. \r\n- The temptation to install a library and call it done. \r\n- What these tools can and cannot catch without further analysis.\r\n\r\nThe alternative (10 min)\r\n- Borrowing from Design Justice and disability rights. \r\n- Exploring failure modes rather than optimising metrics. \r\n- Harm reduction over false perfection. \r\n- Transparency, explanation, empowerment.\r\n\r\n**What you'll take home**\r\nYou'll leave with sharper mental models for thinking about fairness in technical systems, frameworks borrowed from Design Justice and disability rights movements, and tools for the next tricky conversation at work about what fairness actually means. There are no easy answers here, but there are better questions.", "recording_license": "", "do_not_record": false, "persons": [{"code": "NYBL9E", "name": "Laura Summers", "avatar": "https://pretalx.com/media/avatars/NYBL9E_minFyG1.webp", "biography": "Laura is a very technical designer\u2122\ufe0f, working at Pydantic as Lead Design Engineer. Her side projects include Sweet Summer Child Score (summerchild.dev) and Ethics Litmus Tests (ethical-litmus.site). Laura is passionate about feminism, digital rights and designing for privacy. She speaks, writes and runs workshops at the intersection of design and technology.", "public_name": "Laura Summers", "guid": "df9768b0-6085-5a20-942c-ec81c2c91343", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NYBL9E/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/S7KYEE/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/S7KYEE/", "attachments": []}, {"guid": "2ef09c9d-823b-5bb6-b886-e65d8cfacd4d", "code": "RNT9FV", "id": 88330, "logo": null, "date": "2026-04-15T14:20:00+02:00", "start": "14:20", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-88330-pytorch-and-cpu-gpu-synchronizations", "url": "https://pretalx.com/pyconde-pydata-2026/talk/RNT9FV/", "title": "PyTorch and CPU-GPU Synchronizations", "subtitle": "", "track": "PyData: PyData & Scientific Libraries Stack", "type": "Talk", "language": "en", "abstract": "CPU\u2013GPU synchronizations are a subtle performance killer in PyTorch: they block the host, prevent the CPU from running ahead, and create GPU idle gaps. This talk explains what host-device synchronization is, how it\u2019s triggered by subtle code patterns (dynamic-shapes), and how to diagnose it with NVIDIA Nsight Systems by correlating utilization gaps with long CUDA API calls. We\u2019ll end with practical mitigation patterns, including unit testing for syncs via `torch.cuda.set_sync_debug_mode()` and when a small Triton kernel can help avoid syncs and fuse ops.", "description": "PyTorch gets its speed from asynchronous execution: the CPU launches operations quickly while the GPU executes them later. CPU\u2013GPU (host-device) synchronizations break this pipeline by blocking the host until the GPU reaches a specific point. The result is often counterintuitive: even if kernels are fast, the GPU develops idle gaps, throughput drops, and latency rises because the CPU can no longer run ahead and keep the GPU fed with work.\r\n\r\nThis talk builds intuition with a minimal loop that alternates a slow GPU operation with a quick \u201cbookkeeping\u201d operation, a pattern that resembles many inference and training pipelines. By adding a seemingly harmless action\u2014such as printing a CUDA tensor\u2014we\u2019ll see how easily a synchronization can be introduced and why the slowdown can be disproportionate to what the code appears to do.\r\n\r\nWe\u2019ll then walk through a practical profiling workflow in NVIDIA Nsight Systems. The key technique is to correlate GPU utilization gaps with long CPU-side CUDA API calls (for example cudaStreamSynchronize) that indicate the host thread is waiting. Comparing a healthy trace to a sync-heavy trace makes it clear where the pipeline stalls and which code region triggers it.\r\n\r\nBeyond the usual suspects (.item(), printing device tensors, explicit device transfers), the talk highlights dynamic shapes as a common synchronization trigger. Patterns like boolean indexing with a GPU mask or slicing with a GPU-resident index can force PyTorch to fetch information back to the CPU to determine output sizes and allocations. We\u2019ll discuss how to recognize these cases and how to restructure code toward shape-stable alternatives when possible.\r\n\r\nFinally, we\u2019ll cover how to prevent regressions. Instead of relying on profiling alone, we\u2019ll use PyTorch\u2019s experimental API `torch.cuda.set_sync_debug_mode()` in unit tests to surface synchronizations early, while keeping production code unchanged. We\u2019ll close with guidance on when a small Triton kernel is worth considering to avoid sync-inducing patterns and to fuse multiple small ops into a single, fully asynchronous kernel.", "recording_license": "", "do_not_record": false, "persons": [{"code": "UX7XPD", "name": "Tomas Ruiz", "avatar": "https://pretalx.com/media/avatars/UX7XPD_CrEadMF.webp", "biography": "I am a research assistant at the Ludwig-Maximilian-University of Munich within Prof. Schwemmer\u2019s Computational Social Science Lab. My research area is the intersection of Machine Learning and Social Media, particularly on multi-modal understanding. In previous jobs, I have worked as a software engineer in different corporations (Amazon, Allianz, BMW) and Startups. The projects ranged from optimization algorithms to backend-engineering.", "public_name": "Tomas Ruiz", "guid": "a9bbfd5f-8d84-502c-b1f0-a1392751de26", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/UX7XPD/"}], "links": [{"title": "Blog Post", "url": "https://tomasruizt.github.io/posts/08_cpu_gpu_synchronization/", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/RNT9FV/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/RNT9FV/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/RNT9FV/resources/PyTorc_ZaqUSDp.pdf", "type": "related"}]}, {"guid": "5b838c7f-5afd-527b-988f-976ed4a7662c", "code": "HP7DLX", "id": 88454, "logo": null, "date": "2026-04-15T15:00:00+02:00", "start": "15:00", "duration": "00:45", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-88454-beyond-kafka-and-s3-python-data-pipelines-with-http-native-bytestreams", "url": "https://pretalx.com/pyconde-pydata-2026/talk/HP7DLX/", "title": "Beyond Kafka and S3: Python Data Pipelines with HTTP-Native Bytestreams", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk (long)", "language": "en", "abstract": "Real-time bytestreams between systems in different organizations or secured environments, whether for batch dataset delivery or continuous streaming, are surprisingly hard. Traditional solutions fall short: message brokers like Kafka use discrete messages, file storage like S3 works for batch exchange but lacks streaming and coordination, while HTTP client-server approaches require one side to host and expose server endpoints, introducing security and operational overhead.\r\n\r\nThis talk introduces the ZebraStream Protocol: an open, HTTP-based bytestream protocol with coordination mechanisms that let you stream data \u2014 Parquet files, compressed archives, encrypted content\u2014directly between decoupled systems using Python's file-like interface. No message framing, no server hosting, no exposed endpoints.\r\n\r\nWe'll explore the design of a bytestream protocol for data sharing and integration that crosses the file-stream boundary, enabling seamless integration with pandas and any Python library expecting file-like objects, supporting use cases from ETL pipelines to IoT data delivery, cross-org collaboration to home network automation.", "description": "**TL;DR** *Streaming data between systems \u2014 whether across organizations, from secured environments, isolated networks, or even home setups \u2014 remains a common challenge in modern data engineering and data sharing workflows. This talk introduces the ZebraStream Protocol: an open, HTTP-based bytestream protocol designed specifically for decoupled systems, where both sides act as clients \u2014 no server hosting, no exposed endpoints.*\r\n\r\n### Talk Outline (45 minutes)\r\n\r\n**Opening \u2014 The Shape of the Solution (3 min)**\r\n\r\nThe talk opens with a UNIX pipe: opaque, minimal, composable. Any program that reads from stdin and writes to stdout already fits \u2014 no negotiation, no shared infrastructure. Two real-world use cases introduce the challenge: a supplier pushing inventory to a buyer's pipeline, and a hospital sharing trial data with a contract research organization. The question the talk sets out to answer: can the pipe's properties work across organizational boundaries, over HTTP?\r\n\r\n**Part 1 \u2014 Why the Problem Is Hard (8 min)**\r\n\r\nSharing data across organizational boundaries requires sharing infrastructure, trust, protocol, and format. Every crossing is a negotiation, and the cost is ongoing. The coupling spectrum \u2014 from function calls to cross-org transfers \u2014 sets up a precise vocabulary for what \"strong decoupling\" actually means. A well-composed protocol owns only transport and access, leaving structure and format to the caller.\r\n\r\n**Part 2 \u2014 What Already Exists (4 min)**\r\n\r\nKafka, S3, and HTTP APIs each fail at strong decoupling in a specific and diagnosable way. Kafka requires the other side to adopt a platform. S3 is a storage abstraction, not a transfer abstraction \u2014 no presence signal, no cleanup. An HTTP API permanently makes one side a server. Reading each failure as a requirement, a named pipe already satisfies all three \u2014 within a machine. The open question: can this work over HTTP?\r\n\r\n**Part 3 \u2014 The ZebraStream Protocol (5 min)**\r\n\r\nThe basic protocol and its Data API are revealed: a bytestream channel over HTTP where both sides are clients. A stateless relay sits in the middle \u2014 exclusive channel, HTTPS outbound only, separate read and write tokens. The difference between a message and a bytestream is made precise: no opinions on size, structure, or format. A raw HTTP example using `requests` shows the Data API in full \u2014 producer streams a generator over PUT, consumer reads a streaming GET response.\r\n\r\n**Part 4 \u2014 Presence and Coordination (5 min)**\r\n\r\nHTTP connects immediately, without knowing whether the other side is there. Two failure modes show the consequence: a consumer holding a silent GET with no way to tell if the producer is slow or absent; a producer writing into a PUT with no signal that nobody is reading. The Connect API resolves this with an explicit waiting room \u2014 the first client waits, the second triggers the transfer. Push and pull are runtime choices, not architectural ones: whoever arrives first waits.\r\n\r\n**Demo 1 \u2014 Push and Pull** (3 min): the supplier/buyer inventory use case, both modes shown live; the rendezvous is the point.\r\n\r\n**Part 5 \u2014 Python Integration (8 min)**\r\n\r\n`zebrastream-io` implements `io.IOBase`. Any library that accepts a file \u2014 pandas, loguru, tarfile, csv, pickle \u2014 works immediately, with no changes to existing code. Because there is no intermediate file, the producer's write and the consumer's read are the same operation: an early disconnect on either side raises immediately. No silent failures, no orphaned files, no copy cascades.\r\n\r\n**Demo 2 \u2014 Log Streaming** (5 min, notebook): two lines added to a loguru producer; the consumer is the ZebraStream CLI. The application logs normally \u2014 transport is invisible.\r\n\r\n**Part 6 \u2014 Design Decisions and Security (5 min)**\r\n\r\nThree deliberate choices \u2014 HTTP, bytestream, stateless relay \u2014 are named alongside what each costs. The security model follows from the relay design: TLS and scoped tokens require trusting the relay; end-to-end encryption does not. The relay moves ciphertext and has no key. Per-chunk encryption keeps live streams encrypted without buffering the full payload. The hospital/CRO use case from the opening gets its resolution: pull mode, on-demand EHR query, one extra argument \u2014 the relay operator sees nothing.\r\n\r\n**Closing \u2014 Open Protocol (1 min)**\r\n\r\nThe protocol specification is open and community-focused. The Python client is open source. ZebraStream.io is the managed relay and protocol sponsor. The talk closes where it opened: opaque, minimal, composable \u2014 across organizational boundaries.\r\n\r\n**Q&A (5\u201310 min)**", "recording_license": "", "do_not_record": false, "persons": [{"code": "ZZWUGV", "name": "Johannes Dr\u00f6ge", "avatar": "https://pretalx.com/media/avatars/ZZWUGV_qbT3MLS.webp", "biography": "Johannes holds a PhD in computer science, has developed open-source software, algorithms and statistic methods for genome data analysis, worked as a data scientist, and led a group of data engineers in a mid-size startup. He is currently bootstrapping SaaS infrastructure software projects with a focus on cross-organizational data sharing.", "public_name": "Johannes Dr\u00f6ge", "guid": "fcc65745-5f10-5cf0-87f3-dfb5f4f5ba3e", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/ZZWUGV/"}], "links": [{"title": "GitHub Project", "url": "https://github.com/zebrastream/", "type": "related"}, {"title": "zebrastream-io Python Package", "url": "https://pypi.org/project/zebrastream-io/", "type": "related"}, {"title": "Cloud Relay", "url": "https://www.zebrastream.io/", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/HP7DLX/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/HP7DLX/", "attachments": [{"title": "Online Slides", "url": "/media/pyconde-pydata-2026/submissions/HP7DLX/resources/slides_L2won98.pdf", "type": "related"}]}, {"guid": "8efbc0d5-b237-531b-a333-f07698b9560e", "code": "EL7X8C", "id": 85305, "logo": null, "date": "2026-04-15T16:15:00+02:00", "start": "16:15", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-85305-hierarchical-models-in-mmm-can-structure-beat-data-size", "url": "https://pretalx.com/pyconde-pydata-2026/talk/EL7X8C/", "title": "Hierarchical Models in MMM: Can Structure beat data size?", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk", "language": "en", "abstract": "In every marketing project, teams strive to find more data, a longer timeframe, and more detailed splits, just to fix noisy channel attribution.\r\n\r\nBut what if structure played a bigger role than size and volume? \r\nIn this talk, we try to prove this. Using a simple toolkit like Arviz and PyMC, we show you a simple hierarchical mix model, and how, by applying partial pooling, we can stabilize important KPIS like ROAS estimates across sparse channels- without the need for more data.\r\nWe will go through the code, transformation, and the real-life practices that allow us to get as close to the truth, to be able to have a meaningful impact in the marketing world.\r\nThe approach will be centered around marketing mix models, different transformations, and how useful it will be for the business.", "description": "## What we are going to show\r\n\r\n- Country-specific marketing Data that is, unfortunately, never good.\r\n- Function and Python transform like Adstock and saturation (with the tests so that you can see it in action).\r\n- Differentiation between pooled, unpooled, and partial pooling.\r\n- Meaningful diagnostics.\r\n- Wins and losses of hierarchical modeling.\r\n\r\n## Why this is interesting and relevant\r\n\r\nHow do you model marketing effectiveness when you only have 12 months of data per country, some channels are interrupted for weeks, and your manager wants reliable ROAS estimates yesterday?\r\nMost teams think: \"We need more data.\" But getting more data takes time, costs money, and sometimes isn't even possible (or the quality is bad).\r\n\r\nWhat if you could get better estimates by changing how you model the problem?\r\nThis is where hierarchical modeling and partial pooling come in. Instead of treating each market as separate (unpooled) or pretending they're all identical (pooled), we let markets share information through partial pooling. Countries with thin data borrow strength from the group, while markets with strong signals pull away from the mean. You get stability where you need it and flexibility where the data supports it. We show this end-to-end in Python: from building testable transform functions (Adstock, saturation curves, lag effects) to assembling three different model architectures in PyMC, to evaluating which one gives you calibrated intervals and stable ROAS estimates. You'll see the good, the bad, and the ugly.\r\n\r\n## Main challenges\r\n- Making transforms reusable and testable_ Marketing transformations like adstock and saturation are usually hidden in modeling code. It is generally very difficult to imagine how they look, how they change the data. We pull them out as pure Python functions with clear signatures, unit tests (pytest), and property-based checks (hypothesis). This makes them composable, debuggable, and easy to understand and even improve.\r\n- Building fair model comparisons: We construct pooled, unpooled, and hierarchical models with identical priors where appropriate so the comparison isolates the effect of structure, not prior choice. We walk through the PyMC code, show how partial pooling works mathematically, and run short MCMC chains that still demonstrate the key differences.\r\nWe go beyond \"we reached 90% R2\" to actual decision metrics:\r\n     * Posterior predictive checks: Does the model generate realistic data?\r\n     * ROAS stability: how much do channel estimates vary across groups?\r\n\r\nWe use ArviZ throughout to visualize traces, compare models, and compute these metrics. You'll see exactly when hierarchical structure pays off and when it doesn't.\r\n\r\n## Practical lessons and the repo\r\n\r\n**We share what we learned building this:**\r\n- Data checks and control using Pydantic, so you catch errors before MCMC runs for hours\r\n- Test your transforms independently: Yes, for unit tests!\r\n- Use synthetic data with known ground truth to validate the whole pipeline\r\n- Calibration metrics matter more than posterior predictive RMSE alone\r\n\r\n**The repo will include:**\r\n- Typed transform functions (Adstock, saturation, lag) with unit tests\r\n- Three PyMC models with matching priors\r\n- ArviZ evaluation scripts (calibration, PPC)\r\n- A Typer CLI to run everything on a predefined CSV\r\n\r\n**When hierarchical lose (and what to do about it):**\r\nPartial pooling isn't magic. If your groups are genuinely wildly different and you have almost no data per group, hierarchical models can still produce overconfident nonsense. We show a scenario where this happens and discuss alternatives: stronger priors, splitting the hierarchy, or just admitting you don't have enough signal.\r\nThe takeaway: structure beats volume in the right conditions. We help you recognize those conditions and build models that respect them.", "recording_license": "", "do_not_record": false, "persons": [{"code": "RJBMTK", "name": "Mohamed Amine Jebari", "avatar": "https://pretalx.com/media/avatars/RJBMTK_44JDi8l.webp", "biography": "Mohamed Amine Jebari is a Lead Data Scientist based in Berlin, specializing in large-scale machine learning systems, Marketing Mix Modeling, and applied NLP. With extensive hands-on experience in Python and the scientific ecosystem, including pandas, NumPy, scikit-learn, PyMC, transformers, and Hugging Face. Amine builds end-to-end solutions that bridge rigorous statistical modeling with modern LLM-driven workflows.\r\n\r\nWorking at a data-driven consultancy, he leads a team of data scientists while remaining deeply involved in technical development, from Bayesian modeling to production-grade pipelines on AWS. Their work often focuses on solving real-world business problems with interpretable, high-impact models.\r\nCurious to uncover the truth and being a big fan of puzzles, he is now heavily working on causal inference and marketing mix models, pulling one inch at a time, closer the the truth.", "public_name": "Mohamed Amine Jebari", "guid": "226df2a0-09b2-5c4d-bdec-88d2d267800b", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/RJBMTK/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/EL7X8C/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/EL7X8C/", "attachments": []}, {"guid": "0a9546aa-9363-54bf-93e5-96fba3aa3e56", "code": "B8GQ9Z", "id": 87706, "logo": null, "date": "2026-04-15T16:55:00+02:00", "start": "16:55", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-87706-metashade-compilerless-immediate-mode-shader-generation-in-pure-python", "url": "https://pretalx.com/pyconde-pydata-2026/talk/B8GQ9Z/", "title": "Metashade: Compilerless Immediate-Mode Shader Generation in Pure Python", "subtitle": "", "track": "PyData: PyData & Scientific Libraries Stack", "type": "Talk", "language": "en", "abstract": "Discover how to build a GPU shader generator in pure Python, without having to write a compiler.\r\n\r\nWe start by discussing how Pythonic embedded domain-specific languages (EDSLs) can help address the common challenges of shader programming.\r\n\r\nWe then examine the architectural decisions shared by popular frameworks like Warp and Taichi and outline their limitations. In particular, their reliance on introspection means supporting only a subset of Python - a language within a language - while compiler-like backends necessitate complex implementations in languages like C++.\r\n\r\nThe talk introduces an alternative architecture making it possible to overcome these limitations. Instead of introspection, we capture the program's logic by tracing execution with proxy objects at Python runtime, similar to JAX and PyTorch. Instead of building an IR, we emit target code eagerly, line-by-line, similar to how PyTorch Eager Mode launches computations. And because we don't implement a compiler, the implementation remains 100% Python.\r\n\r\nAttendees will leave with a toolbox of Python metaprogramming patterns empowering them to write a code generator in Python without having to implement a compiler.", "description": "The area of shader programming offers many tough problems to solve. The range of target platforms is vast: from CPU path-tracers to mobile GPUs - served by a zoo of incompatible languages: from GLSL to HLSL, from OSL to WGSL.\r\n\r\n Common challenges include portability, managing specializations, and a lack of abstraction mechanisms. The solutions for these include the archaic C Preprocessor, templates/generics, visual graph frameworks, transpilers and, finally, embedded domain-specific languages (EDSLs).\r\n\r\nPython is an ideal host for Embedded Domain-Specific Languages (EDSLs). Warp, Taichi, Numba, and Triton evolved to target GPU compute. All of them share common architectural decisions. They capture the program's logic by inspecting the Python source code, generate an internal representation and compile that IR to the target format.\r\n\r\nThe above approach comes with significant disadvantages. Only a subset of Python is supported, debugging with standard tools is impossible, integration with external Python code is limited, metaprogramming requires special syntax, and heavy compiler infrastructure needs to be implemented in a language like C++.\r\n\r\nThis talk proposes an alternative architecture. Instead of introspection, we capture the program's logic by tracing execution with proxy objects at Python runtime, similar to JAX and PyTorch. Instead of building an IR, we emit target code eagerly, line-by-line, similar to how PyTorch Eager Mode launches computations. And because we don't implement a compiler, the implementation remains 100% Python.\r\n\r\nWe discuss in detail how core elements of Python syntax can be overloaded to implement such an architecture:\r\n* Operator overloading to capture expressions.\r\n* Context managers to simulate C-like scopes.\r\n* `__setattr__`/`__getattr__` to capture variable names.\r\n* Function decorators to capture function signatures.\r\n\r\nAttendees will leave with a toolbox of Python mataprogramming patterns empowering them to write a code generator in Python without having to implement a compiler.", "recording_license": "", "do_not_record": false, "persons": [{"code": "KFMB83", "name": "Pavlo Penenko", "avatar": "https://pretalx.com/media/avatars/KFMB83_DeqhpoP.webp", "biography": "I was born and raised in Kyiv, Ukraine.\r\n\r\nIn 2000, I received a BSc in Computer Science from Taras Shevchenko National University of Kyiv.\r\n\r\nI started my career in game dev in Kyiv in the early 2000s, and continued it in Canada, moving to Vancouver in 2008 to render zombies at Capcom.\r\n\r\nLater, I worked on VR at AMD, content pipelines at Toonbox, Houdini Engine at SideFX, the Maya viewport at Autodesk and Redshift RT at Maxon.\r\n\r\nCurrently, I'm Principal Software Developer at Autodesk, working on material and shading workflows in MaterialX and Hydra applications.", "public_name": "Pavlo Penenko", "guid": "2490152a-004a-5c91-870b-ac39a99694da", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/KFMB83/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/B8GQ9Z/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/B8GQ9Z/", "attachments": [{"title": "Metashade Slides", "url": "/media/pyconde-pydata-2026/submissions/B8GQ9Z/resources/metash_YD2nuIO.pdf", "type": "related"}]}, {"guid": "3b02c59c-1f54-5058-afe3-678d2d73644c", "code": "HRFYVS", "id": 87788, "logo": null, "date": "2026-04-15T17:35:00+02:00", "start": "17:35", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-87788-ai-is-changing-the-game-building-modular-ai-ready-platforms-on-top-of-legacy-systems", "url": "https://pretalx.com/pyconde-pydata-2026/talk/HRFYVS/", "title": "AI Is Changing the Game: Building Modular, AI-Ready Platforms on Top of Legacy Systems", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk", "language": "en", "abstract": "AI is fundamentally changing how quickly business and domain teams can create new logic, validations, and insights. In regulated environments, this new speed collides head-on with legacy systems, monolithic architectures and IT landscapes that were never designed for continuous AI-driven change.\r\n\r\nThis talk presents an open, Python-based platform architecture that turns AI-driven pressure into an architectural advantage. Instead of embedding AI into existing monoliths, the platform introduces a central control layer that orchestrates independent, stateless apps\u2014ranging from classical algorithms to AI agents\u2014without binding them to specific infrastructure or legacy constraints.\r\n\r\nThe control layer, implemented using Python and optionally Django, provides workflow orchestration, security, tenant management, and self-service registration of new components. This allows domain teams to deploy AI agents\u2014such as anomaly detection for regulatory reporting\u2014within days, while IT retains governance, auditability, and operational stability.\r\n\r\nThe talk argues that AI will amplify architectural weaknesses\u2014and shows why modular orchestration layers will become essential for AI-ready systems far beyond finance.", "description": "# AI Is Changing the Game: Building Modular, AI-Ready Platforms on Top of Legacy Systems\r\n\r\nAI is no longer a future topic\u2014it is actively reshaping expectations inside organizations. Domain and business teams can now prototype new rules, validations, and analytical logic themselves, often within days. While this accelerates innovation, it puts enormous pressure on existing IT architectures, especially in environments dominated by legacy systems and monolithic platforms.\r\n\r\nThis talk explores how software architecture must evolve to absorb this pressure instead of breaking under it.\r\n\r\nRather than embedding AI capabilities directly into legacy systems, the presented approach introduces a modular, AI-ready platform built around independent, stateless apps orchestrated by a central control layer. These apps can represent classical reporting logic, risk calculations, or AI agents, all treated as first-class architectural components.\r\n\r\nThe talk is highly relevant for the **PyCon track \u201cProgramming, Software Engineering & Testing\u201d**, because it demonstrates how to design, orchestrate, and integrate AI-driven workflows in complex Python-based platforms. The central control layer, implemented using Python and optionally Django, provides workflow orchestration, security, tenant management, and self-service registration of new components. This allows domain teams to deploy AI agents or agents written with the help of AI within days, while IT retains governance, auditability, and operational stability.\r\n\r\nBy showing how AI-driven pressure can be turned into an architectural advantage, the talk provides patterns and practical lessons that apply far beyond finance, making it relevant for any domain dealing with legacy systems, modular design, and AI integration.\r\n\r\n\r\n## Architectural Concepts Covered\r\n\r\nThe talk introduces the key architectural principles behind the platform:\r\n\r\n- **Independent, stateless apps** that declare their data needs and outputs but remain unaware of infrastructure, environments, or other apps  \r\n- **Strict separation of concerns** between domain logic, orchestration, persistence, and presentation  \r\n- **Technology-indifferent design**, allowing apps to run on different databases, reporting tools, or compute backends  \r\n- **Parallel and distributed execution** as a default, not an optimization  \r\n\r\nThis architecture allows legacy systems to coexist with modern components instead of blocking innovation.\r\n\r\n## The Control Layer as an Enabler for AI\r\n\r\nA central part of the talk is the control layer that orchestrates all components. Implemented using Python and optionally Django, this layer is responsible for:\r\n\r\n- workflow orchestration and dependency management  \r\n- authentication, authorization, and tenant isolation  \r\n- self-service registration of apps and AI agents  \r\n- resource allocation, monitoring, and auditability  \r\n\r\nDjango is not used as a traditional CRUD backend, but as governance infrastructure: providing APIs, admin and self-service portals, and security mechanisms that allow fast innovation without losing control.\r\n\r\n## Example: Integrating an AI Agent into a Regulated Platform\r\n\r\nA concrete example demonstrates the architecture in action: integrating an AI agent for e.g. anomaly detection in regulatory reporting.\r\n\r\nThe example walks through:\r\n\r\n- developing the agent as an independent, containerized app  \r\n- registering it via standardized APIs  \r\n- declaring required data and produced results  \r\n- orchestrating it within existing workflows  \r\n- testing, monitoring, and scaling it without touching legacy systems  \r\n\r\nThis shows how new AI capabilities can be deployed within days while maintaining stability and compliance.\r\n\r\n## Why This Matters Beyond Finance\r\n\r\nWhile the example comes from regulatory reporting, the patterns discussed apply to many domains facing similar challenges: data-heavy systems, long-lived platforms, and increasing pressure to integrate AI safely.\r\n\r\nThe talk concludes with lessons learned and architectural patterns that help future-proof systems as AI continues to raise the bar for flexibility, speed, and modularity.", "recording_license": "", "do_not_record": false, "persons": [{"code": "FPMRS9", "name": "Werner Gothein", "avatar": "https://pretalx.com/media/avatars/FPMRS9_BTTybVZ.webp", "biography": "Experienced software architect and risk management expert with a focus on AI-ready, modular platform design. Over 25 years in developing and integrating financial systems, orchestrating complex workflows, and enabling rapid AI deployment while maintaining governance and stability.", "public_name": "Werner Gothein", "guid": "8b8897f0-02cc-598b-9c9c-06ebda2c1f2d", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/FPMRS9/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/HRFYVS/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/HRFYVS/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/HRFYVS/resources/202604_mUpxL1y.pdf", "type": "related"}]}], "Ferrum [2nd Floor]": [{"guid": "c0a37479-0cca-5428-9a28-d8d2f0383773", "code": "DDVW3W", "id": 88425, "logo": null, "date": "2026-04-15T10:15:00+02:00", "start": "10:15", "duration": "01:30", "room": "Ferrum [2nd Floor]", "slug": "pyconde-pydata-2026-88425-getting-career-clarity-in-uncertain-times", "url": "https://pretalx.com/pyconde-pydata-2026/talk/DDVW3W/", "title": "Getting Career Clarity in Uncertain Times", "subtitle": "", "track": "General: Education, Career & Life", "type": "Tutorial", "language": "en", "abstract": "Feeling unsure about your next step in your career?\r\n\r\nThe data & AI field is evolving faster than ever. New tools, new roles, and constant \u201cnext big things\u201d can make even experienced professionals feel unsure about where they are heading, and how to make intentional career decisions in the middle of all this change.\r\n\r\nYou might be doing well, feeling comfortable. Interesting work, steady progress, recognition.\r\nAnd still, there\u2019s that question in the background: Where is this actually going?\r\n\r\nThis interactive workshop helps you explore different future paths, understand trade-offs, and gain clarity about what kind of work and influence you want next.", "description": "The data & AI field is evolving faster than ever. New tools, new roles, and constant \u201cnext big things\u201d can make even experienced professionals feel unsure about where they are heading, and how to make intentional career decisions in the middle of all this change.\r\n\r\nYou might be doing well, feeling comfortable. Interesting work, steady progress, recognition.\r\nAnd still, there\u2019s that question in the background: Where is this actually going?\r\n\r\nA lot of career advice in tech assumes there is a clear path to follow. In reality, most data & AI careers don\u2019t work that way. Roles shift, organisations change, and what used to feel like a logical next step often isn\u2019t anymore.\r\n\r\nIn this workshop, we\u2019ll slow things down and focus on direction rather than decisions. The goal is not to figure out \u201cthe next job\u201d, but to get clearer on the kind of work you want to do.\r\n\r\nThis is a practical, hands-on session.\r\n\r\nWe\u2019ll use exercises such as odyssey planning to explore a few possible future paths you could take from here. \r\n\r\nYou\u2019ll work through:\r\n    \u2022    Different ways your career could evolve\r\n    \u2022    The trade-offs each direction comes with\r\n    \u2022    What feels worth exploring further, and what doesn\u2019t\r\n\r\nAnd, just as importantly, how you feel about these plausible futures.\r\n\r\nYou\u2019ll leave with a clearer sense of what matters to you now, and a stronger sense of direction\r\n\r\nBy the end of the session, you will:\r\n    \u2022    Have more clarity about the kind of work and influence you want going forward\r\n    \u2022    See more than one possible future, instead of feeling stuck with a single \u201cright\u201d option\r\n    \u2022    Feel more confident navigating uncertainty without rushing into decisions\r\n\r\nWho this session is for\r\n    \u2022    Data & AI professionals with a few years of experience or more\r\n    \u2022    People who feel \u201cin between\u201d stages, roles, or directions\r\n    \u2022    Individual contributors and leaders alike\r\n    \u2022    Anyone who wants more intentionality in their work", "recording_license": "", "do_not_record": false, "persons": [{"code": "NMACLQ", "name": "Tereza Iofciu", "avatar": "https://pretalx.com/media/avatars/NMACLQ_7YAZwK6.webp", "biography": "Tereza Iofciu is a data and AI expert, leadership coach, and PSF Fellow with 15+ years of experience leading data and product teams at neuefische, FREE NOW, and New Work (XING). She helps professionals lead and adapt in the age of AI through her Data Diplomat Framework\u2122, bridging technical depth with human leadership.", "public_name": "Tereza Iofciu", "guid": "9f1c4db3-3e40-5e40-a06d-ad540d3a75fc", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NMACLQ/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/DDVW3W/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/DDVW3W/", "attachments": []}, {"guid": "de1c3707-29b9-5a6e-98a0-1def67a1aa49", "code": "N8QVT8", "id": 87233, "logo": null, "date": "2026-04-15T14:20:00+02:00", "start": "14:20", "duration": "01:30", "room": "Ferrum [2nd Floor]", "slug": "pyconde-pydata-2026-87233-accelerate-fastapi-development-with-openapi-generator", "url": "https://pretalx.com/pyconde-pydata-2026/talk/N8QVT8/", "title": "Accelerate FastAPI Development with OpenAPI Generator", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Tutorial", "language": "en", "abstract": "Develop FastAPI applications faster with the contract-first approach using the OpenAPI Generator, no GenAI required. \r\n\r\n**To attend this workshop, please install the openapi generator.** \r\nFor details, please visit the README.md of https://gitlab.com/Eeffee/pycon26\r\n\r\nMachine learning models are often deployed as APIs, but the \"agreement\" between the consumer and the service is often fragile. How does the consuming app know if a parameter is optional or required? When the code diverges from the documentation, integration breaks.\r\nIn this tutorial you will learn to define an API contract using OpenAPI specification. We will use the OpenAPI Generator to automatically generate API endpoints and strictly typed Pydantic data models. Following this approach for all applications supports standardization, consistency, and maintainability across all projects.\r\n\r\nThe session will cover three key areas:\r\n**Design**: We will define an OpenAPI specification as our single source of truth for the API and end consumer.\r\n**Generate**: We will use the OpenAPI Generator to create a FastAPI skeleton and show possibilities for customization to fit specific project needs.\r\n**Implement**: We will connect our generated app to a ML model where we will create Mystic Creatures for Real Life Problems", "description": "Machine learning models are often deployed as APIs, where we have an endpoint that generates predictions given some input. For example, we can send a POST request specifying a color, a length, and a number of legs, and the endpoint predicts the best fitting animal. The description of the endpoint, the schema of the request, and the response acts as a form agreement between the consumer and the service. In practice, the restrictions on the API are not well defined. How does the consuming app know if a parameter is optional or required? \r\nIn this tutorial you will learn to define an API contract as an OpenAPI specification (OAS). OAS is a standardized description of the API endpoints and data models. We will demonstrate how to use the OpenAPI Generator to automatically generate the API endpoints and strictly typed Pydantic data models, by only designing the OAS in YAML format, without GenAI. OpenAPI Generator utilizes mustache templates to translate the specification into actual code. We will demonstrate use cases for customizing the template for specific needs of the resulting API stubs.  \r\nBy generating code from the contract, you ensure that the deployed application always reflects the agreed-upon specification. It automates the writing of repetitive code, such as Pydantic models and endpoint definitions, allowing developers to focus on the implementation logic. It enforces standard patterns and structures, ensuring consistency and maintainability across different projects.\r\n\r\nExpect fun mystic creatures after deploying the resulting API in your local environment.\r\n\r\n#### Target Audience\r\nEngineers and data scientists looking to standardize their FastAPI development workflow. We expect you to have basic knowledge in Python, virtualenv, Pydantic data models and FastAPI.\r\n\r\n**To attend this workshop, please install the openapi generator v7.20.** \r\nFor details, please visit the README.md of https://gitlab.com/Eeffee/pycon26\r\n\r\n#### Technical Setup\r\n* _Operating system:_ We recommend using Unix OS (Mac or Linux)\r\n* _Python:_ Version 3.10+\r\n* _OpenAPI Generator:_ Version 7.20\r\n   * Installation Guide: https://openapi-generator.tech/docs/installation/\r\n\r\nFor details, please visit the README.md of https://gitlab.com/Eeffee/pycon26\r\n\r\n#### Outline\r\n1. Introduction (10 min)\r\n* The philosophy of Contract-First development\r\n* Overview of the OpenAPI specification and Pydantic data models\r\n* Introduction to the OpenAPI generator tool\r\n2. Design (20 min)\r\n* Introduction to the unicorn service logic (Input: Real Life Problems,  Output: Mystic Creatures)\r\n* Definition of the openapi specification, focusing on the Request and Response schemas\r\n3. Generate (30 min)\r\n* Running the standard vanilla OpenAPI generator\r\n* Introduction to mustache templates\r\n* Customization of the default mustache to inject our specific dependencies\r\n4. Implementing (15 min)\r\n* We will connect the generated API stubs to a predict() function that calls our unicorn generation service.\r\n5. Demo & QA (15m)\r\n* Running the server via u**v**icorn and testing our u**n**icorn service endpoint using the Swagger UI.", "recording_license": "", "do_not_record": false, "persons": [{"code": "NUFKS8", "name": "Dr. Evelyne Groen", "avatar": "https://pretalx.com/media/avatars/NUFKS8_BOl5ZsM.webp", "biography": "I am a senior MLOps engineer at Malt. A long time ago I studied physics in Amsterdam, after which I moved to Berlin to discover the world of data science. Currently I'm working at Malt exploring the boundaries between devops and data.", "public_name": "Dr. Evelyne Groen", "guid": "690caf28-069b-5a2a-88bc-63c37026da03", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NUFKS8/"}, {"code": "93VSJ9", "name": "Kateryna Budzyak", "avatar": "https://pretalx.com/media/avatars/93VSJ9_uDSfhJH.webp", "biography": "Kat is a Senior Machine Learning Engineer at Malt, the freelancer marketplace, where she works in the relevancy and matching team. She has a background in bioinformatics and passionate about beautiful code.", "public_name": "Kateryna Budzyak", "guid": "eab4f7ea-31f2-5129-bfa2-5bbbea551790", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/93VSJ9/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/N8QVT8/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/N8QVT8/", "attachments": [{"title": "pycon26_Budzyak_Groen", "url": "/media/pyconde-pydata-2026/submissions/N8QVT8/resources/pycon__pYJMaAA.pdf", "type": "related"}]}, {"guid": "e54db58e-77d9-5fc6-9887-4aeb8824b134", "code": "N98BQT", "id": 87741, "logo": null, "date": "2026-04-15T16:15:00+02:00", "start": "16:15", "duration": "00:30", "room": "Ferrum [2nd Floor]", "slug": "pyconde-pydata-2026-87741-practical-refactoring-with-syntax-trees", "url": "https://pretalx.com/pyconde-pydata-2026/talk/N98BQT/", "title": "Practical Refactoring with Syntax Trees", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk", "language": "en", "abstract": "The Python Abstract Syntax Tree powers tools like pytest, linters, and automatic refactoring. \r\nIn this talk, we'll approach syntax trees from first principles and see how Python code can be treated as structured data.\r\n\r\nWe'll then explore how syntax trees can be used to automate refactoring across large codebases. \r\nUsing a real-world example and the libCST library, we'll build a small refactoring tool and share practical advice for writing and applying automated refactorings.\r\n\r\nYou'll leave with a clear mental model of syntax trees and a solid starting point for writing your own refactoring tools.", "description": "Modern Python tooling relies heavily on syntax trees. In this talk, we take a practical look at Python's Abstract Syntax Tree (AST) and how Python code can be treated as structured data rather than plain text.\r\n\r\nWe'll start from first principles: how Python source code is parsed, what an AST represents, and how to reason about code as a tree. This builds a clear mental model that makes syntax-tree-based tooling easier to understand and work with.\r\n\r\nFrom there, we'll explore how syntax trees enable automated refactoring across large codebases using scripts to rewrite code (sometimes called codemods). \r\nUsing a realistic refactoring scenario, we'll implement a small refactoring tool using libCST.\r\n\r\nThe talk also shares practical tips from writing codemods. This includes how to use test-driven development when writing refactoring tools, where AI can help in refactoring tasks, and strategies for dealing with formatting.\r\n\r\nAttendees will leave with a solid understanding of how syntax trees work in Python and a concrete starting point for writing their own automated refactoring tools.\r\n\r\n\r\nOutline:\r\n\r\nMinutes 0-5: Primer on Python syntax trees and the AST mental model\r\nMinutes 5-12: From syntax trees to codemods and automated refactoring\r\nMinutes 12-22: Implementing a refactoring codemod with libCST\r\nMinutes 22-27: Test-driven codemods, formatting strategies, and AI assistance\r\nMinutes 27-30: Conclusion\r\n\r\n\r\nEDIT: \r\n- [Slides](https://ldirer.github.io/talk_pycon_de_2026_libcst/)\r\n- [Code for example](https://github.com/ldirer/codemod-rename-pytest-fixtures)", "recording_license": "", "do_not_record": false, "persons": [{"code": "WRXLGJ", "name": "Laurent Direr", "avatar": "https://pretalx.com/media/avatars/WRXLGJ_EjaUsMJ.webp", "biography": "I'm a freelance web developer helping small teams ship reliable software. I've been working with Python for 10+ years and enjoy automating work for other developers.\r\n\r\nThese days I'm very interested in local-first software technologies.\r\nI attended the Recurse Center (a programming retreat) in 2018. \r\n\r\n[GitHub profile](https://github.com/ldirer)\r\n[Blog](https://ldirer.com/blog)", "public_name": "Laurent Direr", "guid": "4dd2a82d-05ff-5638-bb54-e21a6063c0d9", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/WRXLGJ/"}], "links": [{"title": "Code for example", "url": "https://github.com/ldirer/codemod-rename-pytest-fixtures", "type": "related"}, {"title": "Slides", "url": "https://ldirer.github.io/talk_pycon_de_2026_libcst/", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/N98BQT/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/N98BQT/", "attachments": []}, {"guid": "1b4f2dc5-9c04-50f6-b3bd-094acdba7e9a", "code": "R7TT3E", "id": 87782, "logo": null, "date": "2026-04-15T16:55:00+02:00", "start": "16:55", "duration": "00:30", "room": "Ferrum [2nd Floor]", "slug": "pyconde-pydata-2026-87782-simplifying-rag-document-pipelines-with-multimodal-embeddings", "url": "https://pretalx.com/pyconde-pydata-2026/talk/R7TT3E/", "title": "Simplifying RAG Document Pipelines with Multimodal Embeddings", "subtitle": "", "track": "PyData: Natural Language Processing & Audio (incl. Generative AI NLP)", "type": "Talk", "language": "en", "abstract": "In RAG-based systems, the main challenge is often not tuning the LLM itself, but making documents available in a form that can be retrieved reliably. In enterprise settings, the dominant input format is still PDF, ranging from text-heavy reports to slide decks, scanned documents, and visually dense presentations. \r\n\r\nTraditional document processing pipelines rely on OCR and layout analysis to extract text, followed by chunking and embedding. While this works well for text-heavy documents, much of the original structure is often lost\u2014especially for presentations, multi-column layouts, and visually driven content. Images, charts, and diagrams typically require separate processing, increasing pipeline complexity and fragility.\r\n\r\nRecent multi-modal embedding models enable a different approach: embedding entire PDF pages directly as images. This preserves layout, visual hierarchy, and embedded graphics in a single representation and significantly simplifies document ingestion. \r\n\r\nThis talk compares classical OCR-based document processing pipelines with multi-modal page embeddings, drawing on benchmarks conducted on real-world enterprise documents across different models. It highlights where this approach performs well, where its limitations lie, and how to design practical, cost-aware retrieval systems in Python.", "description": "This talk provides an overview of how document processing for RAG systems can be simplified using multimodal embeddings, grounded in benchmarks on real-world enterprise documents.\r\n\r\nWhat the talk covers\r\n\r\n1. **Motivation: Why RAG Is Still Hard**  \r\n   Why PDFs remain challenging in enterprise RAG systems, and where current document processing approaches break down\u2014especially for presentations and visually structured documents.\r\n\r\n2. **The Classical Approach: PDF \u2192 Text \u2192 Chunks**  \r\n   An overview of traditional OCR- and layout-based pipelines, including their strengths, typical failure modes, and why they tend to grow into complex and fragile systems over time.\r\n\r\n3. **A New Paradigm: Multimodal Page Embeddings**  \r\n   How embedding entire PDF pages as images changes the ingestion model, what information is preserved compared to text-only approaches, and what this means for retrieval quality and system simplicity.\r\n\r\n4. **Benchmark Setup**  \r\n   How the benchmark comparing classical pipelines and multimodal page embeddings was designed, using anonymized, real-world enterprise documents across multiple document types. Different models and vendors are referenced only as examples, not as the focus.\r\n\r\n5. **Results and Key Findings**  \r\n   Where multimodal page embeddings outperform text-based pipelines, where they do not, and how hybrid approaches can emerge as a practical solution.\r\n\r\n6. **Production Best Practices**  \r\n   Practical guidance for deploying these approaches in real systems, including index design, quality monitoring, cost control, and how to integrate multimodal retrieval cleanly into Python-based RAG architectures.\r\n\r\nAttendees will leave with a clear understanding of when multimodal embeddings are a strong replacement for classical PDF pipelines, and how to reason about the trade-offs involved.", "recording_license": "", "do_not_record": false, "persons": [{"code": "LDZFAV", "name": "Arne Grobr\u00fcgge", "avatar": "https://pretalx.com/media/avatars/LDZFAV_iVgQuXd.webp", "biography": "Worked on multi-modal retrieval-augmented generation (RAG) and agentic LLM systems. Designed ingestion and retrieval pipelines across text, video, and structured data to integrate common knowledge platforms such as Microsoft SharePoint. Focused on scalable Azure-based infrastructure, multilingual and multimodal document processing, and continuous evaluation for reliability. Gathered experience in building browser-driven agents using modern orchestration frameworks and MCP integration.", "public_name": "Arne Grobr\u00fcgge", "guid": "b4f611aa-0c9b-5eb4-8d58-76b33f1e8f83", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/LDZFAV/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/R7TT3E/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/R7TT3E/", "attachments": [{"title": "Presentation", "url": "/media/pyconde-pydata-2026/submissions/R7TT3E/resources/rag_wi_NGHsRow.pdf", "type": "related"}]}, {"guid": "bc79c9be-0d48-5f93-9cd2-13197460bfcc", "code": "P8Y9TD", "id": 87644, "logo": null, "date": "2026-04-15T17:35:00+02:00", "start": "17:35", "duration": "00:30", "room": "Ferrum [2nd Floor]", "slug": "pyconde-pydata-2026-87644-the-day-the-agent-started-lying-politely", "url": "https://pretalx.com/pyconde-pydata-2026/talk/P8Y9TD/", "title": "The Day the Agent Started Lying (Politely)", "subtitle": "", "track": "PyCon: MLOps & DevOps", "type": "Talk", "language": "en", "abstract": "You deploy an agent to automatically route incoming customer support tickets. At first, it is a clear win: response times improve, customers are happier, and support teams finally get some rest.\r\n\r\nThen time passes.\r\n\r\nNothing crashes. Dashboards stay green. No alerts fire. Yet the agent\u2019s decisions slowly degrade first slightly, then inconsistently, and eventually becoming confidently wrong.\r\n\r\nThis is data drift.\r\n\r\nLLM-based agents in production operate in constantly changing environments. Products launch, outages happen, terminology evolves, and priorities shift. Unlike traditional ML models, LLMs can produce plausible, well-phrased outputs even when they are incorrect, making these failures difficult to detect.\r\n\r\nIn this talk, we focus on practical techniques for continuously evaluating and monitoring LLM-based agents after deployment. Using a support-ticket routing agent as an example, we examine drift signals such as increasing classification uncertainty, spikes in fallback categories, shifts in embedding distributions, and growing disagreement with historical or human decisions.\r\n\r\nThe emphasis is not on training or prompt tuning, but on operating agents safely over time: detecting silent failures early and knowing when intervention, retraining, or retirement is required before users notice.", "description": "In this talk, we will walk through a concrete production-style example of an LLM-based agent that automatically classifies and routes incoming customer support tickets. The agent takes raw ticket text as input, predicts a priority label, and routes the ticket to the appropriate support queue. A human override is possible but expected to be rare.\r\n\r\nAt deployment time, the system performs well. Classification confidence is high, fallback usage is low, and manual corrections are infrequent. Over time, however, the environment changes: new products are launched, outages introduce new failure modes, terminology evolves, and internal definitions of ticket priorities shift. Nothing crashes, latency remains stable, and traditional service-level metrics stay green; yet the agent\u2019s decisions slowly degrade.\r\n\r\nThis talk focuses on how to observe, measure, and act on that degradation.\r\n\r\nUsing recorded ticket data and a demo, I will show how to instrument an LLM-based agent with continuous evaluation signals, including:\r\n\r\n- Tracking class-probability entropy over time to detect increasing uncertainty\r\n- Monitoring the rate of \u201cunknown\u201d or fallback predictions as an early warning signal\r\n- Measuring embedding distribution drift between historical and recent tickets\r\n- Quantifying disagreement between current agent decisions and historical routing outcomes or human corrections\r\n\r\nI will demonstrate how these signals can be computed in rolling time windows, visualised on simple dashboards, and connected to alert thresholds. Rather than relying on a single accuracy number, the talk shows how multiple weak signals together reveal silent failure modes that would otherwise go unnoticed.\r\n\r\nThe focus is deliberately not on training new models or tuning prompts. Instead, we concentrate on operating LLM-based agents safely after deployment. You will see how to build a continuous evaluation pipeline, how to distinguish normal variation from meaningful drift, and how to decide when intervention is required whether that means retraining, prompt changes, label redefinition, or temporary rollback to human routing.\r\n\r\nBy the end of the talk, attendees will have a clear, practical blueprint for monitoring LLM-based agents in production and for detecting quiet, confident failure modes before they affect users or business operations.", "recording_license": "", "do_not_record": false, "persons": [{"code": "XG9398", "name": "Asya Melnik", "avatar": null, "biography": "I started as a data scientist, building ML microservices and deploying models into production. I later moved into a consulting role, where I helped adapt ML models to real customer needs, translate business problems into measurable objectives, interpret results, and monitor model performance over time.\r\n\r\nOver the years, my work gradually shifted towards GenAI. I now design and build AI agents from scratch for internal process optimisation, support colleagues in adopting GenAI and agentic AI responsibly, and promote security-aware practices in solution development. A large part of my work focuses on evaluating and monitoring agent behaviour in real environments to ensure these systems remain useful, safe, and trustworthy after deployment.", "public_name": "Asya Melnik", "guid": "2df8866d-ad5e-51b4-9809-794113a900c2", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/XG9398/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/P8Y9TD/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/P8Y9TD/", "attachments": [{"title": "presentation", "url": "/media/pyconde-pydata-2026/submissions/P8Y9TD/resources/pycon_U4M25KV.pptx", "type": "related"}]}], "Dynamicum [Ground Floor]": [{"guid": "61c0d8c3-0630-546d-90ef-45af0e5afa0d", "code": "VWCZXS", "id": 87652, "logo": null, "date": "2026-04-15T10:15:00+02:00", "start": "10:15", "duration": "01:30", "room": "Dynamicum [Ground Floor]", "slug": "pyconde-pydata-2026-87652-process-analyze-and-transform-python-code-with-asts", "url": "https://pretalx.com/pyconde-pydata-2026/talk/VWCZXS/", "title": "Process, Analyze, and Transform Python Code with ASTs", "subtitle": "", "track": "PyCon: Python Language & Ecosystem", "type": "Tutorial", "language": "en", "abstract": "You\u2019ve likely used a tool like `black`, `flake8`, or `ruff` to lint or format your code, or a tool like `sphinx` to document it, but you probably do not know how they accomplish their tasks. These tools and many more use **Abstract Syntax Trees (ASTs)** to analyze and extract information from Python code. An AST is a representation of your code's structure that enables you to access and manipulate its different components, which is what makes it possible to automate tasks like code migrations, linting, and docstring extraction.\r\n\r\nIn this workshop, you\u2019ll learn how to use the Python standard library\u2019s `ast` module to parse and analyze code. Using just the standard library, we will implement a couple of common checks from scratch, which will give you an idea of how these tools work and help you build the skills and confidence to use ASTs in your own projects.", "description": "This tutorial will be a roughly 50/50 split of lecture and exercises. Attendees will get hands-on experience working with ASTs in Python, using only the standard library. By recreating common code-quality checks from scratch, attendees will both learn how common tools work under the hood and how to work with the AST in an easy-to-understand fashion.\r\n\r\nTopics covered:\r\n- Introduction to the term and concept of Abstract Syntax Trees (ASTs)\r\n- Some of the ways ASTs are used by Python itself and by popular tools\r\n- Parsing code into an AST and inspecting it\r\n- Walking the tree: `ast.iter_fields()`, `ast.iter_child_nodes()`, `ast.walk()`\r\n- Modifying the code before running it\r\n- Converting an AST into source code again with `ast.unparse()` and its caveats\r\n- Finding missing docstrings\r\n- `ast.NodeVisitor` and `ast.NodeTransformer`\r\n- `generic_visit()` method  \u2014 what it does and why we need it using animation\r\n- 4 exercise breaks spread throughout accounting for ~45 minutes", "recording_license": "", "do_not_record": false, "persons": [{"code": "9WJJPL", "name": "Stefanie Molin", "avatar": "https://pretalx.com/media/avatars/9WJJPL_CpF0joR.webp", "biography": "[Stefanie Molin](https://stefaniemolin.com) is a software engineer at Bloomberg in New York City, where she tackles tough problems in information security, particularly those revolving around data wrangling/visualization, building tools for gathering data, and knowledge sharing. She is also a core developer of [numpydoc](https://github.com/numpy/numpydoc) and the author of \u201c[Hands-On Data Analysis with Pandas: A Python data science handbook for data collection, wrangling, analysis, and visualization](https://www.amazon.com/Hands-Data-Analysis-Pandas-visualization/dp/1800563450),\u201d which is currently in its second edition and has been translated into Korean and Chinese. She holds a bachelor\u2019s of science degree in operations research from Columbia University's Fu Foundation School of Engineering and Applied Science, as well as a master\u2019s degree in computer science, with a specialization in machine learning, from Georgia Tech. In her free time, she enjoys traveling the world, inventing new recipes, and learning new languages spoken among both people and computers.", "public_name": "Stefanie Molin", "guid": "a8a67b83-2096-5586-949a-be3485dfad1e", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/9WJJPL/"}], "links": [{"title": "Setup instructions", "url": "https://github.com/stefmolin/ast-workshop?tab=readme-ov-file#setup-instructions", "type": "related"}, {"title": "Slides", "url": "https://stefaniemolin.com/ast-workshop/", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/VWCZXS/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/VWCZXS/", "attachments": []}, {"guid": "4129014c-8ef9-59d5-bb56-6a53d5a547a0", "code": "AGYLTV", "id": 88428, "logo": null, "date": "2026-04-15T14:20:00+02:00", "start": "14:20", "duration": "01:30", "room": "Dynamicum [Ground Floor]", "slug": "pyconde-pydata-2026-88428-array-oriented-programming-in-python-libraries-techniques-and-trade-offs", "url": "https://pretalx.com/pyconde-pydata-2026/talk/AGYLTV/", "title": "Array-Oriented Programming in Python: Libraries, Techniques, and Trade-offs", "subtitle": "", "track": "PyData: PyData & Scientific Libraries Stack", "type": "Tutorial", "language": "en", "abstract": "Python has become the dominant language for scientific computing and data science, largely due to powerful array libraries that enable high-performance numerical computation. This tutorial introduces array-oriented programming as a paradigm and surveys the modern Python array ecosystem.\r\n\r\nWe'll explore when and how to use different array libraries: NumPy for general-purpose array operations, JAX for automatic differentiation, just-in-time compilation of array-oriented code, and GPU acceleration, Numba for just-in-time compilation of imperative code, and Awkward Array for nested and irregular data structures. Through live demos, we'll show how to think in arrays, discuss the limitations of array-oriented programming, and demonstrate how JIT compilation addresses these challenges.\r\n\r\nWhether you're analyzing data, building machine learning models, or doing scientific simulations, understanding the strengths and trade-offs of each library will help you choose the right tool for your problem.", "description": "## Material\r\nhttps://github.com/ikrommyd/2026-04-15-pyconde-and-pydata-2026-tutorial-array-oriented-programming\r\nWhat you need: Your laptop, and the repository cloned and the environment set up as explained in the README. Alternatively, an internet connection during the tutorial to set up the environment live or follow along on MyBinder.\r\nThe setup is needed to do the problems/puzzles which are part of the tutorial.\r\n\r\n## Overview\r\n\r\nPython's dominance in scientific computing and data science stems from its powerful array libraries that enable high-performance numerical computation. This 90-minute tutorial introduces array-oriented programming as a paradigm and surveys the modern Python array ecosystem, helping you understand which tools to use and when.\r\n\r\n## What is Array-Oriented Programming?\r\n\r\nArray-oriented programming is a paradigm that separates problems into lightweight Python bookkeeping and heavy numerical computation handled by vectorized operations in fast, precompiled libraries. We'll demonstrate how this approach combines Python's ease of use with near-compiled-language performance.\r\n\r\nThrough live examples, you'll see how array operations can be orders of magnitude faster than explicit loops. This mindset shift\u2014thinking about operations on entire arrays rather than individual elements\u2014is fundamental to effective scientific Python programming.\r\n\r\n## The Array Library Landscape\r\n\r\nWe'll survey the modern Python array ecosystem and when to use each tool:\r\n\r\n- **NumPy**: The foundation for general-purpose array operations\r\n- **Numba & JAX**: JIT compilation approaches\u2014when and why to use each\r\n- **Awkward Array**: Handling nested and ragged data structures\r\n- **Large dataset tools**: Brief overview of Dask, Xarray, Zarr, and Blosc2 for distributed computing, labeled arrays, and compression\r\n\r\nWe'll demonstrate the strengths and limitations of each through live coding examples, showing trade-offs between different approaches.\r\n\r\n## Understanding Limitations and Trade-offs\r\n\r\nA critical part of choosing the right tool is understanding when array-oriented programming has limitations. We'll discuss challenges like intermediate array overhead and algorithms that don't naturally vectorize, and show how different libraries address these problems.\r\n\r\n## What You'll Learn\r\n\r\nBy the end of this tutorial, you will:\r\n\r\n1. **Understand array-oriented programming** as a paradigm and how it differs from imperative programming\r\n2. **Know which library to choose** for different problems: NumPy vs. Numba vs. JAX vs. specialized tools\r\n3. **Recognize when array-oriented approaches have limitations** and how to address them with JIT compilation\r\n4. **Handle non-rectilinear data** using libraries like Awkward Array\r\n5. **Work with large datasets** using chunking, compression, and labeled arrays\r\n6. **Write more performant Python code** by applying array-oriented thinking to your own problems\r\n\r\n## Prerequisites\r\n\r\nFamiliarity with Python (loops, functions, if statements) and basic NumPy exposure (what an array is and how to use it). No deep expertise required.\r\n\r\n## Target Audience\r\n\r\nData scientists, researchers, and engineers who want to write more efficient Python code, understand the modern array ecosystem, or choose the right tools for their problems.\r\n\r\n## Outline\r\n* **0:00\u20120:10 (10 min)** Lecture 1: Array-oriented programming and its benefits. Simple and complex (3 body problem) examples of imperative, functional, and array-oriented styles. Speed and memory advantages in Python. What the array-oriented paradigm emphasizes/is good for: interactive analyses of distributions. Path length as a worked example.\r\n* **0:10\u20120:25 (15 min)** NumPy puzzles and solutions. Alternating between hands-on puzzles and walkthrough of solutions: array slicing, consecutive differences, curve length, and image downscaling with reshape.\r\n* **0:25\u20120:35 (10 min)** Lecture 2: Disadvantages of array-oriented programming. (1) The problem of intermediate arrays, shown using the quadratic formula, with timing, compared to pre-compiled C code. (2) The \u201citerate until converged\u201d problem, shown using a one-dimensional minimizer (Newton\u2019s method) for an array of initial states; talk about epochs in ML.\r\n* **0:35\u20120:45 (10 min)** Lecture 3: JIT-compilation with Numba and JAX. Describe JIT-compilation as the solution to the intermediate array problem (1). First Numba then JAX on the quadratic formula. Show that Numba only accelerates if you write imperative code, unlike JAX, and show that JAX can\u2019t follow if-branches or loops of unknown length.\r\n* **0:45\u20120:55 (10 min)** Project 3: JIT-compilation of the Mandelbrot set. Walk through imperative Python, array-oriented NumPy, Numba, and JAX implementations with timings. Note that array-oriented programming is advantageous for GPU programming, even beyond Python.\r\n* **0:55\u20121:05 (10 min)** Lecture 4: Ragged and deeply nested arrays. Show examples of ragged, nested, missing, and heterogeneous data, and how it can still make sense to treat them as arrays. Conversion to and from \u201ctidy\u201d data (tabular with references) to compare and contrast.\r\n* **1:05\u20121:20 (15 min)** Lecture 5: Working with large datasets. Overview of tools for chunking, compression, and labeled arrays: Dask, Zarr, Blosc2, and xarray.\r\n* **1:20\u20121:30 (10 min)** Wrap-up and Q&A.", "recording_license": "", "do_not_record": false, "persons": [{"code": "3E3SVE", "name": "Iason Krommydas", "avatar": "https://pretalx.com/media/avatars/3E3SVE_gPPzXZE.webp", "biography": "I'm a PhD student in the Department of Physics and Astronomy at Rice University, conducting research in high-energy physics as a member of the CMS experiment at the Large Hadron Collider at CERN. My work focuses on studying Higgs boson decays into two photons, analyzing data collected by the CMS detector, and contributing to software development for large-scale scientific analyses. I'm passionate about scientific computing and open-source tools that enable reproducible and efficient research. I\u2019m maintainer of Awkward Array, an array library for nested, variable-sized data, using NumPy-like idioms, and an author and maintainer of Coffea, a toolkit designed to simplify data analysis in particle physics. With experience in the scientific Python ecosystem, I enjoy building tools that drive insight and accelerate scientific discovery.", "public_name": "Iason Krommydas", "guid": "840e481e-40dc-51ea-87c4-451e4dbb762a", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/3E3SVE/"}], "links": [{"title": "GitHub Repository", "url": "https://github.com/ikrommyd/2026-04-15-pyconde-and-pydata-2026-tutorial-array-oriented-programming", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/AGYLTV/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/AGYLTV/", "attachments": []}, {"guid": "24b5b905-6587-5de9-87c9-1578b8f6fcc0", "code": "S9VSCV", "id": 87003, "logo": null, "date": "2026-04-15T16:15:00+02:00", "start": "16:15", "duration": "00:30", "room": "Dynamicum [Ground Floor]", "slug": "pyconde-pydata-2026-87003-roll-for-architecture-dungeonpy-a-d-d-companion-as-server-thin-clients", "url": "https://pretalx.com/pyconde-pydata-2026/talk/S9VSCV/", "title": "Roll for Architecture: DungeonPy \u2013 A D&D Companion as Server + Thin Clients", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk", "language": "en", "abstract": "### **DungeonPy** \u2013 an interactive Dungeon&Dragons app for remote campaigns\r\nAs a matter of fact, tabletop RPGs are secretly distributed systems: one canonical world state, many clients, lossy links (players), and strict access control (\u201cno peeking at the DM notes\u201d). This talk introduces **DungeonPy**, which evolves a Python D&D companion from two local app \u2013 a Pygame battle map and a PySimpleGUI initiative/condition tracker \u2013 connected by lightweight TCP messages, into an authoritative server with multiple role-aware clients. The result is a fully real-time interactive setup, where the DM controls the full state and can reveal information selectively \u2013 under the hood it\u2019s all about client intents, server validation, state updates, event broadcasting and periodic snapshots. We will cover protocol design (deltas vs snapshots, ordering/idempotency), server-side view projections (DM omniscience vs per-player truth and fog-of-war), UI-safe concurrency, and testing your homemade message bus without summoning race conditions. Expect patterns you can reuse in any stateful client/server app \u2013 just with more goblins.", "description": "## **Roll for Architecture: DungeonPy \u2013 A D&D Companion as Server + Thin Clients**\r\nMany tiny, personal projects reach a point where \u201cit works on my machine\u201d is no longer the interesting part, and it becomes more about making it *scale in structure*: clean boundaries, explicit state, testable behaviour and room for new features. This session is a case study of that journey using a D&D assistant for remote playing written in Python, that turned into something completely off-scale.\r\nThe starting point is a few desktop clients:\r\n- a **Pygame** (battle) *map* (grid, tokens, map objects, movement), and\r\n- a **PySimpleGUI** *tracker* for initiative, HP and conditions, with a clear \u201cactive combatant\u201d concept,\r\n\r\ninitially synchronized with lightweight TCP messages.\r\nThis already exposes real engineering questions: avoiding GUI thread violations, preventing feedback loops, and deciding what the \u201csource of truth\u201d is when both ends can initiate updates.\r\nThe evolved version introduces an **authoritative server**. Players connect as clients and can interact in real time \u2013 moving tokens and manipulating shared objects \u2013 while the DM client keeps full visibility and control. Clients do not share state with each other: they submit *intents* (move here, end turn, toggle condition), the server validates, updates state, and broadcasts events plus periodic snapshots. The key architectural move is *role-scoped state*: the server owns the full truth and projects different \u201cviews\u201d to each client (DM omniscience vs per-player information), so fog-of-war and hidden details are enforced by design. In other words:\r\n- **DM client**: full map + all combatants + hidden details.\r\n- **Player client**: a filtered view (only the player\u2019s character sheet details, their token, and whatever the DM has revealed).\r\n\r\nThe authoritative server runs on a small VPS with a public endpoint. Clients connect over secure WebSockets (wss://) on port 443, so players can join from anywhere without port forwarding. TLS is terminated by a standard reverse proxy, and the server speaks a small JSON message protocol (snapshots + events) over WebSocket frames.\r\n\r\n### **Open source software used**\r\n- `Pygame` (map rendering + input)\r\n- `PySimpleGUI` (initiative/conditions UI)\r\n- `asyncio` (multi-connection handling)\r\n- `websocket` (client/server transport)\r\n- (non-python) `NGINX` (reverse proxy, TLS)\r\n\r\n### **Detailed talk outline**\r\n1. - **Intro: D&D, remote play, and why am I doing this?**\r\n2. - **Setting up the table**\r\n        - PyGame\r\n        - PySimpleGUI\r\n3. - **State model and serialisation**\r\n        - Turning GUI objects into explicit data (combatants, map, doors, initiative order).\r\n        - JSON snapshots and versioning.\r\n4. - **Protocol design: events vs snapshots**\r\n        - Event messages for responsiveness (\u201ctoken moved\u201d, \u201ccondition added\u201d).\r\n        - Snapshot sync for recovery and late joiners.\r\n        - Idempotency and ordering: simple sequence numbers, replay safety and conflict avoidance.\r\n5. - **Role-based filtering (the privacy boundary)**\r\n        - A single canonical server state.\r\n        -  Server-side \u201cview projection\u201d: DM view vs per-player view.\r\n        - Practical examples: hidden enemies, secret doors, private notes, fog-of-war style reveals.\r\n6. - **Concurrency and UI integration**\r\n        - Socket threads feeding GUI event loops safely (posting events into the GUI thread rather than touching widgets directly).\r\n        - Keeping the map smooth under network jitter: optimistic UI vs confirmed updates (and when not to).\r\n7. - **Testing strategy for a networked hobby project**\r\n        - Unit tests for pure state transitions (\u201capply damage\u201d, \u201cadvance turn\u201d, \u201cillegal move rejected\u201d).\r\n        - Protocol tests with simulated clients.\r\n        - Logging that helps during live play without drowning you in noise.\r\n8. - **Extensibility hooks**\r\n        - Adding new client types (spectator screen, mobile character sheet).\r\n        - Plug-in style rules (different systems, homebrew conditions).\r\n        - Future improvements: authentication for remote play, persistence, and reconnection.\r\n\r\n### **Takeaways**\r\nAttendees will leave with a practical blueprint for:\r\n- Designing a tiny, testable message protocol in Python.\r\n- Updating GUIs safely from background network threads.\r\n- Enforcing \u201cwho can see what\u201d without duplicating logic everywhere.\r\n- More importantly, they\u2019ll be given an example on how python can be used for highly non-standard tasks (like allowing remote role-playing gaming).\r\n\r\n### **Intended audience**\r\nBasic and intermediate Python developers comfortable with basic classes and modules, and curious about architecture and networking. This talk will be as much about *Python* as it will be about *nerd culture*: the goal is learning something while keeping a light heart.", "recording_license": "", "do_not_record": false, "persons": [{"code": "RCRAQE", "name": "Francesco Conte", "avatar": "https://pretalx.com/media/avatars/RCRAQE_z4PNypp.webp", "biography": "#### **Who am I?**\r\nI'm a scientific researcher with a PhD in astroparticle physics at the Heidelberg University with a strong will to get out of academia \u2013 but in my life I worked many jobs and each of them changed me somehow. I gave up trying to solve the puzzle.\r\n#### **What do I like?**\r\nI'm passionate about coding, gaming and cats \u2013 well, about many other things, actually. For a cat-loving nerd, I'm surprisingly at ease among people, and on a stage.\r\n#### **What makes me comfortable?**\r\nIf it's about working, kind of everything? Be it coding (mainly Python, Julia, C++, Fortran, IDL, SQL), developing hardware prototypes, scouting for a publishing house, or working in a bookshop. If it's about life \u2013 listening to people talking at ease, and entertain them.\r\n#### **What makes me uncomfortable?**\r\nPeople saying that the AI will bring the world to an end. I'm positive that if it happens, it's more likely it'll be because of them rather than the AI.\r\n#### **Anything else?**\r\nI'm Italian, but it's not my fault.", "public_name": "Francesco Conte", "guid": "46f0314e-f92e-5dd7-a1be-2ce6cc3cc420", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/RCRAQE/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/S9VSCV/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/S9VSCV/", "attachments": [{"title": "Talk slides", "url": "/media/pyconde-pydata-2026/submissions/S9VSCV/resources/Dungeo_oNXG4jM.pdf", "type": "related"}]}, {"guid": "e5a1df09-db9e-5ad7-8c5b-645f5eaa55ad", "code": "BHJERV", "id": 85381, "logo": null, "date": "2026-04-15T16:55:00+02:00", "start": "16:55", "duration": "00:30", "room": "Dynamicum [Ground Floor]", "slug": "pyconde-pydata-2026-85381-django-q2-async-tasks-made-simple", "url": "https://pretalx.com/pyconde-pydata-2026/talk/BHJERV/", "title": "Django-Q2: Async Tasks Made Simple", "subtitle": "", "track": "PyCon: Django & Web", "type": "Talk", "language": "en", "abstract": "Managing asynchronous task queues in Django with tools like Celery can be overkill for many projects. Django-Q2 is a lightweight alternative that integrates natively with the Django admin. In this talk, you will learn how to streamline your background tasks and cron jobs, featuring a practical demo to get you started immediately.", "description": "Handling asynchronous tasks and cron jobs in Django is essential for features like sending emails or generating periodic reports. However, the industry standard Celery often comes with significant configuration overhead and infrastructure dependencies like Redis or RabbitMQ.\r\n\r\nIf you have ever struggled with that complexity or looked for a more intuitive way to manage background processes, Django-Q2 is the answer. It is a lightweight solution that leverages your existing database, eliminating the need for complex brokers. Its native integration makes it perfect for small to medium-sized projects that need to move fast.\r\n\r\nThis talk will guide you through integrating Django-Q2 to simplify your workflow:\r\n\r\n- Problem Solving: We will look at how to use Django-Q2 to solve real-world task management issues.\r\n\r\n- Feature Deep Dive: We will explore key features, such as using the database as a backend and monitoring tasks directly from the Django Admin interface.\r\n\r\n- Live Demo: We will configure Django-Q2 from scratch to handle asynchronous email sending and schedule a recurring maintenance job", "recording_license": "", "do_not_record": false, "persons": [{"code": "EDNBNV", "name": "Moin Uddin", "avatar": "https://pretalx.com/media/avatars/EDNBNV_CR45jCe.webp", "biography": "DevOps Engineer and Dynatrace Consultant at Par-Tec S.p.A. \r\nI am Passionate about technology, innovation, and continuous learning. I like to automate things and I Love Python and K8s. \r\nBeyond the technical world, I am an avid traveller and explorer, always seeking new perspectives and inspiration from around the globe.", "public_name": "Moin Uddin", "guid": "902bc81d-f1f7-5899-8438-8d57787cd2c7", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/EDNBNV/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/BHJERV/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/BHJERV/", "attachments": [{"title": "PyCon-DE-Talk-Slides-Moin-Uddin", "url": "/media/pyconde-pydata-2026/submissions/BHJERV/resources/PyCon-_iGM9ZzK.pdf", "type": "related"}]}, {"guid": "7a82240b-4bbf-5bc5-b008-54f0b04c669e", "code": "GPV9SM", "id": 87620, "logo": null, "date": "2026-04-15T17:35:00+02:00", "start": "17:35", "duration": "00:30", "room": "Dynamicum [Ground Floor]", "slug": "pyconde-pydata-2026-87620-holistic-optimization-implementing-pipeline-as-a-trial-hpo-with-ray-and-cloud-infra", "url": "https://pretalx.com/pyconde-pydata-2026/talk/GPV9SM/", "title": "Holistic Optimization: Implementing \"Pipeline-as-a-Trial\" HPO with Ray and Cloud Infra", "subtitle": "", "track": "PyCon: MLOps & DevOps", "type": "Talk", "language": "en", "abstract": "Most hyperparameter optimization (HPO) stops at the model boundary. But what happens when your system relies on a complex chain of steps, a short-horizon model, a long-horizon model, ensembles, postprocesses etc? Tuning one piece in isolation often leads to sub-optimal global results.\r\n\r\nIn this talk, we explore how we used Ray to move beyond simple model tuning. We\u2019ll dive into a \"Pipeline-as-a-Trial\" architecture where Ray acts as the brain, triggering independent, scalable cloud workflows ( SageMaker Pipelines or Databricks Workflows) for every hyperparameter set.\r\n\r\n\r\nWe will discuss:\r\n* The architectural shift from tuning models to tuning pipelines\r\n* How to build the DAG/pipeline on Sagemaker/Databricks using declarative configs\r\n* How to use Ray to orchestrate heavyweight remote jobs without bottlenecks.\r\n\r\nAttendees will learn how to optimize entire pipelines (in a scalable manner on cloud) to minimize global metrics like WAPE, rather than just local model loss.", "description": "Have you ever tuned a model to perfection, only to have it fail once integrated into your production pipeline? This is the \"local optimization\" trap: fixing a component while unintentionally breaking the complex system around it. \r\nAt Zalando, where we manage hundreds of forecasting models across 25 countries, local wins often lead to global failures.In this talk, we move beyond single-model tuning to explore Holistic Optimization. \r\nWe will detail how our team implemented a \"Pipeline-as-a-Trial\" architecture, \r\n\r\nWhat We\u2019ll Cover:\r\n* An explanation of what \"local optimization\" problem is, and how it appears everywhere from tech products to day-to-day life.\r\n* How we leveraged Ray\u2019s distributed capabilities to manage high-concurrency Machine Learning workloads.\r\n* Infrastructure Comparison: A candid, battle-tested breakdown of running HPO across AWS SageMaker, Databricks, and Internal EC2/Metaflow clusters.\r\n* Operational Trade-offs: Real-world insights into the performance, cost, and traceability of different cloud implementations.\r\n*Configuration Driven Development: How an abstract library layer allows us to scale experimentation across hundreds of production models.\r\n\r\nStop chasing local solutions. Join me to learn how to build a distributed HPO framework that optimizes for your global business objectives.\r\n\r\nPS: if you are a \"Rick and Morty\" fan, definitely join to see how Rick fell into the local optimization problem!", "recording_license": "", "do_not_record": false, "persons": [{"code": "H39TUV", "name": "Abdullah Taha", "avatar": "https://pretalx.com/media/avatars/H39TUV_V2CB9wP.webp", "biography": "Data/MLOps Engineer at Zalando. During my career  I always worked along data scientists to build robust ML pipelines. I am very enthusiastic about designing and implementing scalable and robust systems.", "public_name": "Abdullah Taha", "guid": "ee0b38f8-2eeb-57fb-8c29-577b5d2a8a38", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/H39TUV/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/GPV9SM/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/GPV9SM/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/GPV9SM/resources/Presen_50opRpj.pdf", "type": "related"}]}], "Lounge [1st Floor]": [{"guid": "9475477c-1ead-5eba-a873-8cb403e89237", "code": "KBNKAP", "id": 95844, "logo": null, "date": "2026-04-15T10:00:00+02:00", "start": "10:00", "duration": "02:00", "room": "Lounge [1st Floor]", "slug": "pyconde-pydata-2026-95844-innovation-day-startup-lounge-no-video", "url": "https://pretalx.com/pyconde-pydata-2026/talk/KBNKAP/", "title": "Innovation Day: Startup Lounge [no-video]", "subtitle": "", "track": null, "type": "Open Space", "language": "en", "abstract": "**Presented by hessian.AI**\r\n\r\nPyCon DE & PyData has always been about building bridges \u2014 between disciplines, between communities, between ideas and implementation. And today, the worlds of startups, open source, and AI are more connected than ever.\r\n\r\nThat's why this year we're opening a dedicated space for exactly these conversations: real discussions, no hype, no fluff \u2014 just founders, engineers, and curious minds talking honestly about what it takes to build.\r\n\r\nThe **Startup Lounge** is a new, **unrecorded format** \u2014 which means you can speak freely, think out loud, and have the kind of conversation that doesn't happen on stage.", "description": "## What to expect\r\n\r\nThis is not a talk. There are no slides.\r\n\r\nThe Startup Lounge is an interactive, participatory format built around open conversations and peer exchange. Whether you're actively building a company, thinking about it, or just want to understand how open source and AI actually connect to real business \u2014 this is your space.\r\n\r\nYou'll be in a room with 2,000+ Python developers, engineers, and data scientists. That's a rare concentration of technical depth and practical experience. Use it.\r\n\r\nThe session combines short impulse talks, guided table discussions, and open networking.\r\n\r\n## Program\r\n\r\n### 10:00 \u2013 10:15 \u2014 Welcome & Introduction\r\n\r\nAn introduction to the format and an overview of the session.\r\n\r\n### 10:15 \u2013 10:30 \u2014 Impulse Talks\r\n\r\nThree short talks to spark discussion, including topics such as:\r\n\r\n* Open source & business models  \r\n* Developer tools and coding with AI  \r\n* Additional community-driven topics\r\n\r\n### 10:30 \u2013 11:30 \u2014 Table Discussions\r\n\r\nThe core of the session \u2014 three parallel discussion tables:\r\n\r\n* **\"Hair on Fire\"** \u2014 urgent founder challenges (product, hiring, funding, tech)  \r\n* **Startup Curious** \u2014 for future founders and those exploring the startup world  \r\n* **Tech Stuff** \u2014 where Python meets product: scaling, architecture, AI/ML in production\r\n\r\nParticipants are free to move between tables and join different conversations.\r\n\r\n### 11:30 \u2013 12:00 \u2014 Open Networking\r\n\r\nUnstructured time to continue conversations, connect, and follow up.\r\n\r\n## Why join\r\n\r\nBecause the best conversations at a conference never happen on stage.\r\n\r\nThe Startup Lounge is intentionally small, intentionally unrecorded, and intentionally unscripted. You'll meet founders, engineers, and domain experts \u2014 and actually talk to them. Share a challenge, get direct input, form your own picture.\r\n\r\nIf you're building something, thinking about building something, or just want to understand how open source and AI are reshaping what's possible \u2014 come find out for yourself.", "recording_license": "", "do_not_record": false, "persons": [], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/KBNKAP/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/KBNKAP/", "attachments": []}, {"guid": "1ecdc530-c688-5c37-9626-556aec1c8cb1", "code": "7YA98N", "id": 95845, "logo": null, "date": "2026-04-15T14:20:00+02:00", "start": "14:20", "duration": "01:30", "room": "Lounge [1st Floor]", "slug": "pyconde-pydata-2026-95845-workshop-what-do-we-still-need-to-learn-no-video", "url": "https://pretalx.com/pyconde-pydata-2026/talk/7YA98N/", "title": "Workshop: What do we still need to learn? [no-video]", "subtitle": "", "track": null, "type": "Open Space", "language": "en", "abstract": "AI agents are reshaping how we work, how we code, how we search \u2014 and quietly, how we think about ourselves as practitioners. The ground is shifting. And nobody has all the answers yet. That is exactly why we are doing this. We invite you into an open, interactive workshop in the community space: no slides, no frontal talk, no recording. Just honest conversations about what skills still matter, what is becoming obsolete, and what we need to build to navigate what is coming.", "description": "These are troubling times. Interesting times. Exciting times. Times that challenge us to reflect on where we should be focusing our energy.\r\n\r\nIn this interactive workshop, we will start by collectively answering four thought-provoking questions on shared boards \u2014 every voice, every perspective, visible in the room. Then we vote with dots on the topics that matter most to us. The winning topics go straight into a fishbowl: short, focused five-minute discussions where a small circle talks while everyone else listens, reacts, and rotates in.\r\n\r\nThe insights and themes from this session will flow directly into the panel discussion on Thursday afternoon, so your voice carries further than this room.\r\nCome in, participate, find a seat, and let us figure this out together.\r\n\r\n_Open community space \u00b7 Non-recorded \u00b7 All experience levels welcome_", "recording_license": "", "do_not_record": false, "persons": [{"code": "S3DXCY", "name": "Paula Gonzalez Avalos", "avatar": "https://pretalx.com/media/avatars/WVNMPG_2d7VusK.webp", "biography": "Data Lover, Coach, Manager. \r\n\r\nPaula is a Scientist turned Data Scientist by years of integrating statistics, machine learning methods and data wrangling and visualization pipelines while trying to understand science. In a similar way, in a continuous effort to improve science communication, with a strong sense of design and enjoyment of public speaking, she has become an expert in data visualization, visual presentation and storytelling.    \r\n\r\nShe loves to teach and now I mostly manage teams.    \r\nShe is also sometimes draw science comics: https://github.com/pga99/comics", "public_name": "Paula Gonzalez Avalos", "guid": "50a454c2-0f8e-5d15-9a7a-b91319a30558", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/S3DXCY/"}, {"code": "NEBKEY", "name": "Sebastian Neubauer", "avatar": "https://pretalx.com/media/avatars/NEBKEY_9ioAcAl.webp", "biography": "Data scientist forever; Worked everywhere in Blue Yonder, messed with data science, built platforms, now exploring GenAI & AI agents. Known to always ask the question nobody else dared.", "public_name": "Sebastian Neubauer", "guid": "461acdf1-0d58-52ef-a400-0a008b4a35eb", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NEBKEY/"}, {"code": "MGXCCP", "name": "Dr. Kristian Rother", "avatar": "https://pretalx.com/media/avatars/9EPNQG_IGafHW9.webp", "biography": "Kristian is a freelance Python trainer who wrote his first lines of Python in the year 11111001111. After a career writing software for life science research, he has been teaching Python, Data Analysis and Machine Learning throughout Europe since 2011. More recently, he has built data pipelines for the real estate and medical sector.\r\n\r\nKristian has translated 5 Python books and written 2 more himself, in addition to numerous teaching guides. Kristian has collected 364 stars on Advent of Code. His favorite Python module is 're'. Kristian believes everybody can learn programming.", "public_name": "Dr. Kristian Rother", "guid": "1096b371-55c7-509f-a52d-73e66c5db09b", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/MGXCCP/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/7YA98N/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/7YA98N/", "attachments": []}]}}, {"index": 3, "date": "2026-04-16", "day_start": "2026-04-16T04:00:00+02:00", "day_end": "2026-04-17T03:59:00+02:00", "rooms": {"Merck Plenary (Spectrum) [1st Floor]": [{"guid": "dd96c9a5-0ea1-5b87-8d3d-672c5cb2d2a7", "code": "LSJ3CN", "id": 89469, "logo": null, "date": "2026-04-16T09:05:00+02:00", "start": "09:05", "duration": "00:45", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-89469-a-view-of-sovereignty-from-the-cloud", "url": "https://pretalx.com/pyconde-pydata-2026/talk/LSJ3CN/", "title": "A View of Sovereignty from The Cloud", "subtitle": "", "track": "Keynote", "type": "Keynote", "language": "en", "abstract": "While The Cloud is just someone elses computer, those computers come together from many places and many, many someone elses. The constituent parts to connect, power, house, and ultimately operate those computers are from many more places and someones still! We explore what these infrastructure pieces of The Cloud are explicitly; and how the many definitions of digital sovereignty can be viewed from the viewpoint high up in The Cloud.", "description": "While The Cloud is just someone elses computer, those computers come together from many places and many, many someone elses. The constituent parts to connect, power, house, and ultimately operate those computers are from many more places and someones still! We explore what these infrastructure pieces of The Cloud are explicitly; and how the many definitions of digital sovereignty can be viewed from the viewpoint high up in The Cloud.", "recording_license": "", "do_not_record": false, "persons": [{"code": "QZ3QRG", "name": "Aaron Glenn", "avatar": "https://pretalx.com/media/avatars/QZ3QRG_bXYhftg.webp", "biography": "Aaron A. Glenn has spent decades designing the physical and logical infrastructure that makes \"The Cloud\" possible \u2014 from programmable packet forwarding to large-scale network architecture. In his keynote, he takes us above the abstraction layer to ask what digital sovereignty actually means when you understand what the cloud is made of: whose hardware, whose power, whose fiber, whose decisions.", "public_name": "Aaron Glenn", "guid": "e694e343-101a-5aa4-a185-081a56f87ab1", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/QZ3QRG/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/LSJ3CN/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/LSJ3CN/", "attachments": []}, {"guid": "288d705f-ab59-54fc-a96e-4a83263c4b84", "code": "EPASS8", "id": 87184, "logo": null, "date": "2026-04-16T10:15:00+02:00", "start": "10:15", "duration": "00:30", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-87184-building-mcp-at-the-speed-of-hype-principles-that-outlast-the-trends", "url": "https://pretalx.com/pyconde-pydata-2026/talk/EPASS8/", "title": "Building MCP at the Speed of Hype: Principles That Outlast the Trends", "subtitle": "", "track": "General: Autonomous Systems & AI Agents", "type": "Talk", "language": "en", "abstract": "Every week, development in AI brings us another groundbreaking release, another model version, another must-have integration. In this rapidly shifting landscape, how does one build production systems that won\u2019t be obsolete by the time you deploy them?\r\n\r\nWe'll explain how trusting in proven engineering principles from software development and machine learning, like separation of concerns and evaluation practices, became our anchor in an ever-changing landscape of AI development. We share lessons learned from building two MCP applications using FastMCP and PydanticAI. Against these challenges, we found that fundamental engineering principles provided the foundation we needed. \r\n\r\nParticipants in the process of developing AI tools will leave with practical strategies for building AI-powered systems that are flexible enough to adapt, yet stable enough to trust.", "description": "Every week, AI brings us another groundbreaking release, another model version, another must-have integration. Among these developments, agentic systems have emerged as a key component. Introduced at the end of 2024, the Model Context Protocol (MCP) has become an important enabler of this change and has established itself as the standard for connecting AI agents with external data sources and tools. In this rapidly shifting landscape, how does one build production systems that won't be obsolete by the time you deploy them?\r\n\r\nThis talk shares practical lessons from building two real-world MCP applications with FastMCP and PydanticAI: JobmonitorMCP, which leverages the jobmonitor.de API to create intelligent regional labor market reports, and a tool for an international non-profit combining multiple agents into a powerful question and answer application. \r\n\r\nDuring development, we faced multiple challenges: MCP clients and models that interpret the same protocol differently, emerging features with limited documentation and trying to evaluate non-deterministic outputs. Stakeholders repeatedly asked \"Why does it behave differently today?\" and \"Are we using the newest model yet?\"\r\n\r\nWhat we learned: The antidote to AI hype isn't avoiding new technology, it's anchoring development in trusted engineering principles. Separation of concerns and focused components helped us design for the protocol rather than specific clients. Rigorous evaluation approaches combined LLM-as-Judge with manual review and user feedback. Transparent communication helped us manage expectations around AI capabilities without undermining confidence.\r\n\r\nThis session targets intermediate Python developers building or planning to build AI-powered applications. You'll leave with concrete strategies for building AI systems that adapt to new models while maintaining production stability, reflection questions for your own projects, and perhaps a little more confidence in your existing knowledge.", "recording_license": "", "do_not_record": false, "persons": [{"code": "YPZZ3S", "name": "Rahkakavee Baskaran", "avatar": "https://pretalx.com/media/avatars/YPZZ3S_wxVJWPL.webp", "biography": "Rahkakavee Baskaran is the Data Lead at &effect. As a developer, she works in field of Natural Language Processing and Generative AI with experience in software and infrastructure development.  Her work focuses on leveraging data science and software development to create social impact, particularly in projects related to social sciences and the public sector.", "public_name": "Rahkakavee Baskaran", "guid": "eeeb7f19-7698-50b4-9a58-d2349a250dd5", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/YPZZ3S/"}, {"code": "UUB9SM", "name": "Friederike Bauer", "avatar": "https://pretalx.com/media/avatars/UUB9SM_YwLrIvU.webp", "biography": "Friederike Bauer works as a Data Scientist for &effect and develops software solutions as a Frontend-Developer. She combines data and software development to make a difference in social sciences and public organizations.", "public_name": "Friederike Bauer", "guid": "41965523-6d0f-56ff-b1f4-07afc5a5b014", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/UUB9SM/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/EPASS8/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/EPASS8/", "attachments": [{"title": "slides", "url": "/media/pyconde-pydata-2026/submissions/EPASS8/resources/2026-0_GvuYoje.pdf", "type": "related"}]}, {"guid": "030fdadc-4e71-5012-9f01-66aaa035ed75", "code": "37AESH", "id": 85053, "logo": null, "date": "2026-04-16T10:55:00+02:00", "start": "10:55", "duration": "00:30", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-85053-in-praise-of-documentation-tools-tips-techniques-for-literate-programming-in-the-ai-age", "url": "https://pretalx.com/pyconde-pydata-2026/talk/37AESH/", "title": "In Praise of Documentation: Tools, Tips & Techniques for Literate Programming in the AI Age", "subtitle": "", "track": "General: Education, Career & Life", "type": "Talk", "language": "en", "abstract": "This talk has one simple message: *please document your code*. If you attend my talk, you'll hear me explain why I praise documentation, and why you should too. \r\n\r\nWhile writing documentation is generally acknowledged to be a \"good thing\", most engineers do not document their work. I'll offer my optionated lament on the life and death of literate programming. A lament is a poetic discourse, expressing sadness, or feeling sorry about something. I'll give some examples of the *bad things* that can happen when people don't write documentation.\r\n\r\nThen, after making you feel bad, I'll give examples of how you can *feel good*. I'll explain why writing documentation is a \"good\" edifying activity, which helps you to be a better person, and make a better world.\r\n\r\nI'll review types of open source documentation (Python and Unix), documentation frameworks (Di\u00e1taxis), and Python tools (Sphinx, Jupyter, Quarto) you can try out as soon as my talk is finished.\r\n\r\nThen, I'll get \"cool n' futuristic\" by talking about AI. I'll emphasise the importance of text to AI-assisted coding and agentic workflows for \"spec-driven development\" (e.g. Agent-OS with Claude Code), before tempering your excitement by giving you some old-fashioned advice on \"good\" writing style by George Orwell.\r\n\r\nIn summary, if you come to my talk, you might experience an unusual mixture of sadness combined with hope. To conclude, I'll tell you to \"please document your code\". You'll laugh, go to the next talk, and forget my advice.", "description": "# Introduction\r\n\r\n# In Praise of Documentation\r\n\r\n- The Promise of \"Literate Programming\"\r\n- A Lamentation on the Death of Literate Programming\r\n- Bad things that happen when you don't document\r\n\r\n# Why You Should Document\r\n\r\n- Code is Communication\r\n- Accessible, Maintainable, Sustainable Code\r\n- Version Control (e.g. GitHub)\r\n\r\n# Examples\r\n\r\n## Examples of Open Source Documentation:\r\n\r\n- Python `help`\r\n- Docstrings\r\n- Unix `man` pages\r\n- `README.md`\r\n- `Readthedocs.com`\r\n\r\n## Documentation Framework Example: \r\n\r\n- Di\u00e1taxis\r\n\r\n## Python Documentation Tool Examples: \r\n\r\n- Sphinx\r\n- `cookiecutter`\r\n\r\n## Scientific Publishing Tool Examples:\r\n\r\n- Jupyter\r\n- Quarto\r\n\r\n# The Bit About AI (yes, I know, and I'm sorry)\r\n\r\n- The importance of text for AI Code Assistant and Agentic Coding workflows\r\n\r\n## Documentation in \"Spec-Driven\" Development:\r\n\r\n- `AGENTS.md`\r\n- Agent-OS with Claude Code\r\n\r\n# Calls to Action: \r\n\r\n- Writing Tips by George Orwell\r\n- Please Document Your Code!", "recording_license": "", "do_not_record": false, "persons": [{"code": "UBWSCP", "name": "Stephen", "avatar": "https://pretalx.com/media/avatars/UBWSCP_eFYRzeC.webp", "biography": "Psychologist turned full-stack polyglot Data Scientist with an established career in data analytics, scientific psychology, and project leadership. Driven by values of care, compassion and privacy.", "public_name": "Stephen", "guid": "333b7b4f-91fc-5f9d-92af-867d7591ffbb", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/UBWSCP/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/37AESH/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/37AESH/", "attachments": []}, {"guid": "0e77ea93-a1a0-5fd0-b9e3-c13d506cd7f0", "code": "RUSUYF", "id": 87163, "logo": null, "date": "2026-04-16T11:35:00+02:00", "start": "11:35", "duration": "00:45", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-87163-7-anti-lessons-from-building-a-pydanticai-agent-mistakes-we-made-so-you-don-t-have-to", "url": "https://pretalx.com/pyconde-pydata-2026/talk/RUSUYF/", "title": "7 Anti-Lessons from Building a PydanticAI Agent: Mistakes We Made So You Don't Have To", "subtitle": "", "track": "General: Autonomous Systems & AI Agents", "type": "Talk (long)", "language": "en", "abstract": "Life sciences compliance isn't forgiving. When your software helps companies navigate FDA regulations, ISO 13485, and EU MDR, \"move fast and break things\" isn't an option. Audit trails matter. Documentation is mandatory. Getting it wrong means regulatory findings, delayed product launches, or worse \u2014 patient safety risks.\r\n\r\nDuring the development of our AI Assistant we made every mistake in the most unforgiving environment possible. After more than a year building with PydanticAI, pydantic-evals, and Claude \u2014 nearly 3,000 commits and 20+ contributors \u2014 here are 7 anti-lessons so you don't have to repeat them:\r\n\r\n1. **\"We need a multi-agent system\"** \u2014 We built one. Then deleted it.\r\n2. **\"Agents need sophisticated planning\"** \u2014 A todo list beat our workflow engine.\r\n3. **\"Give the agent lots of specific tools\"** \u2014 Two high-level tools replaced dozens.\r\n4. **\"Encode workflows in code\"** \u2014 Markdown files the agent reads at runtime won.\r\n5. **\"It works when I test it\"** \u2014 Simple tests \u2260 real user journeys. Realistic evals or you're blind.\r\n6. **\"Automate everything\"** \u2014 Human stays in the driver's seat, not the trunk.\r\n7. **\"Apply what made you successful before\"** \u2014 Your engineering instincts might hurt you here.\r\n\r\nReal code, real git commits, real mistakes from a domain where mistakes are expensive.\r\n\r\n**Come for the mistakes. Leave with shortcuts.**", "description": "## The Domain: Where Mistakes Are Expensive\r\n\r\n[Qualio](https://www.qualio.com/) builds quality management software for life sciences companies \u2014 the ones making medical devices, pharmaceuticals, and biotech products. Our customers navigate FDA 21 CFR Part 11, ISO 13485, EU MDR, and SOC 2. In this world, compliance isn't optional. Audit trails are mandatory. Documentation gaps mean warning letters, import bans, or product recalls.\r\n\r\nWhen we decided to build an AI agent to help users manage compliance gaps, create remediation plans, and handle documentation \u2014 we knew the stakes. An agent that hallucinates a regulatory requirement or skips an approval step isn't just annoying. It's a liability.\r\n\r\nSo we built carefully with PydanticAI and Claude. And we still made every mistake possible. Here are 7 anti-lessons from the trenches.\r\n\r\n---\r\n\r\n### Anti-Lesson 1: \"We need a multi-agent system\"\r\n\r\nIt seemed obvious: separate agents for documents, compliance, and events. Clean architecture. We built it, shipped it, and spent weeks debugging coordination failures and inconsistent responses. In a domain where consistency matters, multi-agent chaos was unacceptable. The fix? Delete it. One agent with dynamic capabilities. Simpler, faster, and \u2014 according to our evals \u2014 more accurate.\r\n\r\n### Anti-Lesson 2: \"Agents need sophisticated planning\"\r\n\r\nCompliance workflows are complex. Surely the agent needs workflow graphs, state machines, planning frameworks? We tried. The agent got confused, skipped steps, invented procedures. The fix? A todo list. Add a task, check it off, see what's next. In a regulated environment, simple and auditable beats clever and opaque.\r\n\r\n### Anti-Lesson 3: \"Give the agent lots of specific tools\"\r\n\r\nWe built dozens of tools using PydanticAI's tool registration: `create_document`, `update_control`, `get_gap_details`, `list_frameworks`, `submit_for_review`... The tool descriptions bloated the context. The agent picked wrong tools. The fix? Two high-level tools: `call API` (with OpenAPI specs for the details) and `read instruction` (load a markdown file). Fewer tools, better results, easier to audit.\r\n\r\n### Anti-Lesson 4: \"Encode workflows in code\"\r\n\r\nHow does the agent know how to remediate a compliance gap? How to create a controlled document? At first, it was buried in prompts and Python. The fix? Markdown files \u2014 like Claude's skills system. The agent reads them at runtime. Engineers can review them. Knowledge belongs in documents your compliance team can actually read.\r\n\r\n### Anti-Lesson 5: \"It works when I test it\"\r\n\r\nOur early tests passed. The agent handled every case we threw at it. Then real users arrived \u2014 and everything broke. The problem? Our test cases were simple, synthetic, and predictable. Real user journeys are messy, multi-step, and full of context we didn't anticipate. The fix? Realistic evaluation data. We capture actual user sessions, anonymize them, and run them through pydantic-evals with LLM-as-judge rubrics. Does the agent follow the procedure? Does it hallucinate requirements? Does it handle the weird tangents users take? A 95% pass threshold in CI means nothing if your test data doesn't reflect reality.\r\n\r\n### Anti-Lesson 6: \"Automate everything\"\r\n\r\nWe built a fully automated feedback loop: user feedback creates a Jira ticket, a dev triages it, a Claude instance picks it up, raises a PR, responds to review comments. The dream? The fix: keep the human in the driver's seat. PydanticAI's `DeferredToolRequests` pattern lets our agent propose actions and pause for approval \u2014 the same principle applies to our dev workflow. In compliance software, someone is always accountable. The automation handles grunt work. Humans make decisions. Assisted development, not autopilot.\r\n\r\n### Anti-Lesson 7: \"Apply what made you successful before\"\r\n\r\nThis is the meta anti-lesson. Good engineering habits \u2014 upfront design, comprehensive APIs, handling every edge case \u2014 _can_ slow you down with agents. The LLM will surprise you. Your assumptions will be wrong. The fix? Start scrappy, iterate fast, let evals tell you what's working. The hardest part isn't code. It's unlearning.\r\n\r\n---\r\n\r\n### Bonus: Scaling Agent Development with tmux\r\n\r\nHow to run multiple agent experiments in parallel. Low-tech, high-leverage.\r\n\r\n---\r\n\r\n## Who Should Attend\r\n\r\nDevelopers building AI agents, especially in domains where accuracy and auditability matter. Familiarity with PydanticAI is helpful but not required \u2014 you'll see enough code to get started.", "recording_license": "", "do_not_record": false, "persons": [{"code": "GUUWH3", "name": "Joshua G\u00f6rner", "avatar": "https://pretalx.com/media/avatars/GUUWH3_e1l788f.webp", "biography": "Platform Engineer by Day \u2699\ufe0f  \r\nProduct Engineer by Night \ud83c\udf19  \r\nEx-Data Scientist \ud83d\udcca  \r\nOnline Tutor \ud83d\udcfa   \r\nHusband to a gorgeous Wife \ud83d\udc8d  \r\nFather of 100<sub>2</sub> kids \ud83d\udc23", "public_name": "Joshua G\u00f6rner", "guid": "e45bdbe6-9edc-56c4-b060-77e8f25b0f18", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/GUUWH3/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/RUSUYF/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/RUSUYF/", "attachments": [{"title": "Slides - 7 Anti-Lessons building a PydanticAI Agent", "url": "/media/pyconde-pydata-2026/submissions/RUSUYF/resources/7-anti_jrK5izO.pdf", "type": "related"}]}, {"guid": "2a7676ff-d76b-5761-9769-2ce0d4714082", "code": "AKGUAC", "id": 95769, "logo": null, "date": "2026-04-16T13:20:00+02:00", "start": "13:20", "duration": "01:00", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-95769-open-source-as-a-business-models-paths-and-practice", "url": "https://pretalx.com/pyconde-pydata-2026/talk/AKGUAC/", "title": "Open Source as a Business \u2014 Models, Paths, and Practice", "subtitle": "", "track": null, "type": "Panel", "language": "en", "abstract": "Open source powers the world's digital infrastructure \u2014 from AI research to enterprise data pipelines. But how do you build a sustainable business on it? This panel brings together three figures from the heart of the global open source ecosystem: Yann Lechelle (Probabl), who made the deliberate switch from infrastructure CEO to open source; Sylvain Corlay (QuantStack), whose consulting business is built from and around the core of the Jupyter ecosystem and its maintainers; and Ines Montani (Explosion / spaCy), one of the most influential voices in NLP tooling. Three founders. Three paths. Real answers on what it takes to build \u2014 or switch to building \u2014 a business in open source.", "description": "The discussion centres on concrete experience \u2014 different starting points, different ecosystems, different business models \u2014 with the shared thread being open source as a deliberate professional and commercial choice.\r\n\r\n**Key Questions:**\r\n\r\n1. *Different entry points:* You each came to building a business on open source from a different direction. What drove that decision \u2014 and what did you not expect?\r\n\r\n2. *Where the business actually starts:* Open source is the foundation, not the product. How do you define what you sell, and to whom?\r\n\r\n3. *Community and commerce:* How do you maintain trust and credibility in an open source community while running a commercial operation around it?\r\n\r\n4. *Open source and AI:* The AI landscape is consolidating fast around closed systems. What does that mean for open source projects and the businesses built on them?\r\n\r\n5. *European perspective:* Is there something specifically European about the way you think about open source as a business \u2014 around sustainability, sovereignty, or independence?\r\n\r\n6. *Advice:* What would you tell someone who wants to build a business on open source \u2014 or switch to doing so \u2014 and has not yet started?", "recording_license": "", "do_not_record": false, "persons": [{"code": "TRD78B", "name": "Yann Lechelle", "avatar": "https://pretalx.com/media/avatars/GJSHXR_jsGJhAV.webp", "biography": "Yann Lechelle is a full-stack digital entrepreneur with 30 years of experience building technology that scales \u2014 from real-time trading platforms and edge AI to cloud infrastructure and open-source ML. He led Scaleway as CEO, tripling the team to 600 people and positioning it as a credible European alternative to the hyperscalers. He is now co-founder, Executive President and Chairman of Probabl, the company behind scikit-learn.\r\nAuthor of ouvertarisme.fr, he frames digital sovereignty as a strategic imperative: adopt open technology to reduce dependencies, measure resilience to govern it. His work spans the geopolitical and geoeconomic dimensions of the digital transition, with frameworks like the Indice de R\u00e9silience Num\u00e9rique (IRN) and EuroStack adopted at European level.\r\nCo-founding member of France Digitale and HUB France IA. Entrepreneur-in-Residence at INSEAD. MBA INSEAD 2001J.", "public_name": "Yann Lechelle", "guid": "4c580903-9806-5488-aef3-5aedf5eea3d1", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/TRD78B/"}, {"code": "FZKG9N", "name": "Ines Montani", "avatar": "https://pretalx.com/media/avatars/FZKG9N_7il65fA.webp", "biography": "Ines Montani is a developer specializing in tools for AI and NLP technology. She\u2019s the co-founder and CEO of [Explosion](https://explosion.ai) and a core developer of [spaCy](https://spacy.io), a popular open-source library for Natural Language Processing in Python, and [Prodigy](https://prodi.gy), a modern annotation tool for creating training data for machine learning models.", "public_name": "Ines Montani", "guid": "b60e58b3-bd41-534c-a286-22ae8481a00a", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/FZKG9N/"}, {"code": "FDGPYL", "name": "Sylvain Corlay", "avatar": "https://pretalx.com/media/avatars/YJNSWT_GiPJ9sp.webp", "biography": "Sylvain Corlay is the founder and CEO of QuantStack.\r\n\r\nAs an open-source developer, Sylvain is active in the scientific computing ecosystem, particularly within the Jupyter project, as well as the conda-forge and xtensor projects. In 2018, he and the other Jupyter leadership members were awarded the ACM Software System Award.\r\n\r\nBeyond QuantStack, Sylvain is involved in the community. He served as a board member of the NumFOCUS Foundation from 2018 to 2024, as the vice-chair of JupyterCon 2020, and General Chair of JupyterCon 2023 in Paris. He has coordinated the PyData Paris community since 2017, both as the organizer of the meetup group and as co-organizer of the annual conference in 2024 and 2025.", "public_name": "Sylvain Corlay", "guid": "766dbe51-f9ff-5b16-91f8-eac165988618", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/FDGPYL/"}, {"code": "K9DZKF", "name": "Alexander CS Hendorf", "avatar": "https://pretalx.com/media/avatars/8F38DV_FIGQ7yh.webp", "biography": "Alexander C.S. Hendorf is an independent AI and open-source strategy advisor working with companies in regulated industries. With 20+ years of hands-on experience across 50+ technologies \u2014 from the Python ecosystem to vector databases \u2014 he bridges the gap between boardroom decisions and technical execution. Alexander is a Python Software Foundation Fellow, heads the Open Source Working Group of the KI Bundesverband, serves on the board of the Python Software Verband, and has delivered 100+ talks in 15+ countries.", "public_name": "Alexander CS Hendorf", "guid": "e61ae96e-6f0d-5312-867d-6bf04eefb64f", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/K9DZKF/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/AKGUAC/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/AKGUAC/", "attachments": []}, {"guid": "4fb9df79-0163-519e-bd49-634f394d63a6", "code": "KFPNUA", "id": 95770, "logo": null, "date": "2026-04-16T15:05:00+02:00", "start": "15:05", "duration": "01:00", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-95770-panel-what-do-we-still-need-to-learn", "url": "https://pretalx.com/pyconde-pydata-2026/talk/KFPNUA/", "title": "Panel What Do We Still Need to Learn?", "subtitle": "", "track": null, "type": "Panel", "language": "en", "abstract": "AI is no longer just a technical tool. It is fundamentally rewriting how we approach every professional task and we are now seeing a shift that impacts every role in every industry. In this panel discussion we will discuss the question no one can answer too confidently: in a world where AI writes the code, drafts the report, and automates the pipeline, what exactly are we still supposed to be learning?", "description": "If AI can handle the code, the writing, the routine data processing, and complete automation frameworks with agents, what exactly are we supposed to be learning? This panel brings together the Python community for an honest and likely heated conversation about the skills that actually provide a human edge in an automated world. We are moving past the hype to look at the hard realities of 2026:\r\n\r\n**The Educational Pivot:** When the doing is automated, does technical mastery still matter? A developer and Data Science education expert debates whether we still need to learn the how, or whether the focus should shift entirely to the why.\r\n**The Global Reality**: A consultant's view on how AI is transforming non-technical industries. It is no longer just about code; AI is reshaping the very tasks that define professional roles across the board. And how do you spot real talent in a world of AI-assisted portfolios?\r\n**The Future Framework:** The Head of an AI Academy asks: how do we upskill an entire workforce when the tools are changing faster than any curriculum can be written? And which future skills matter most, beyond AI skills themselves?\r\n\r\nNo consensus guaranteed. These are the very questions we all need to answer, right now.", "recording_license": "", "do_not_record": false, "persons": [{"code": "S3DXCY", "name": "Paula Gonzalez Avalos", "avatar": "https://pretalx.com/media/avatars/WVNMPG_2d7VusK.webp", "biography": "Data Lover, Coach, Manager. \r\n\r\nPaula is a Scientist turned Data Scientist by years of integrating statistics, machine learning methods and data wrangling and visualization pipelines while trying to understand science. In a similar way, in a continuous effort to improve science communication, with a strong sense of design and enjoyment of public speaking, she has become an expert in data visualization, visual presentation and storytelling.    \r\n\r\nShe loves to teach and now I mostly manage teams.    \r\nShe is also sometimes draw science comics: https://github.com/pga99/comics", "public_name": "Paula Gonzalez Avalos", "guid": "50a454c2-0f8e-5d15-9a7a-b91319a30558", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/S3DXCY/"}, {"code": "DT3MJ3", "name": "Sebastian Unterreitmeier", "avatar": "https://pretalx.com/media/avatars/MWKHFS_3KQyTBT.webp", "biography": "With a background in business administration, Sebastian Unterreitmeier has spent more than 20 years advising on strategic people topics. Since 2016, the focus has been on future capabilities and skills in organizations, particularly in the context of strategic and technological change such as AI. Sebastian Unterreitmeier is part of Mercer\u2019s global Center of Excellence on AI and plays a leading role in advancing the practical application of AI in everyday work at Mercer Germany.", "public_name": "Sebastian Unterreitmeier", "guid": "a15a11dc-d4dc-560c-a32c-d816503e3ea0", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/DT3MJ3/"}, {"code": "N7PU9Q", "name": "Silvia H\u00e4nig", "avatar": "https://pretalx.com/media/avatars/JS3SJX_iUm56s2.webp", "biography": "Silvia H\u00e4nig is an entrepreneur, strategic communications advisor, and founder of iKOM, her own consultancy for strategic communication and people advisory. She works with leaders from tech & professional services  companies in the DACh region and internationally (i.e. NTT Data, Tesla, Microsoft, Hays, John Deere), helping them communicate effectively in complex environments like restructuring, transformation and change. Silvia brings extensive leadership experience and a strong track record of guiding decision-makers through change with clarity, credibility, and strategic focus. Her work has been recognized with multiple professional awards. Beyond consulting, she is also a lecturer, author, angel investor, and a strong advocate for leadership and communication that creates lasting impact.", "public_name": "Silvia H\u00e4nig", "guid": "7bf5506c-590a-58e1-8b88-28d20a036763", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/N7PU9Q/"}, {"code": "MGXCCP", "name": "Dr. Kristian Rother", "avatar": "https://pretalx.com/media/avatars/9EPNQG_IGafHW9.webp", "biography": "Kristian is a freelance Python trainer who wrote his first lines of Python in the year 11111001111. After a career writing software for life science research, he has been teaching Python, Data Analysis and Machine Learning throughout Europe since 2011. More recently, he has built data pipelines for the real estate and medical sector.\r\n\r\nKristian has translated 5 Python books and written 2 more himself, in addition to numerous teaching guides. Kristian has collected 364 stars on Advent of Code. His favorite Python module is 're'. Kristian believes everybody can learn programming.", "public_name": "Dr. Kristian Rother", "guid": "1096b371-55c7-509f-a52d-73e66c5db09b", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/MGXCCP/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/KFPNUA/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/KFPNUA/", "attachments": []}, {"guid": "8ce60352-2c8b-5db5-ad0d-a8735cc85d88", "code": "9ZEFTR", "id": 95771, "logo": null, "date": "2026-04-16T16:20:00+02:00", "start": "16:20", "duration": "00:20", "room": "Merck Plenary (Spectrum) [1st Floor]", "slug": "pyconde-pydata-2026-95771-closing-session", "url": "https://pretalx.com/pyconde-pydata-2026/talk/9ZEFTR/", "title": "Closing Session", "subtitle": "", "track": null, "type": "Plenary", "language": "en", "abstract": "Closing Session", "description": "Closing Session", "recording_license": "", "do_not_record": false, "persons": [], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/9ZEFTR/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/9ZEFTR/", "attachments": []}], "Titanium [2nd Floor]": [{"guid": "bf693119-5871-5374-b614-8833f0218c24", "code": "TZYGTL", "id": 86236, "logo": null, "date": "2026-04-16T10:15:00+02:00", "start": "10:15", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-86236-5-years-of-nicegui-what-we-learned-about-designing-pythonic-uis", "url": "https://pretalx.com/pyconde-pydata-2026/talk/TZYGTL/", "title": "5 Years of NiceGUI: What We Learned About Designing Pythonic UIs", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk", "language": "en", "abstract": "NiceGUI has grown from a small experiment into a widely used framework for building modern web-based user interfaces entirely in Python. After five years of development, thousands of users, and countless design iterations, we have gathered a rich set of insights into what makes a UI framework feel truly \u201cPythonic\u201d while still leveraging the power of the web platform.\r\nThis talk presents the key lessons learned while evolving NiceGUI, with a focus on how Python\u2019s own language features can meaningfully improve the developer experience. We explore how context managers, method chaining, decorators, async/await, type hints, dataclasses, and even well-chosen default arguments contribute to a clean, expressive, and maintainable UI API. Attendees will walk away with a deeper understanding of how to design Python-first interfaces\u2014whether for web apps, dashboards, or internal tools\u2014without needing to write JavaScript, CSS, or frontend boilerplate.", "description": "Five years ago, the NiceGUI project set out to answer a simple question: Can we build modern, interactive web UIs entirely in Python without giving up power or flexibility? Since then, the framework has evolved into a production-ready, community-driven tool that builds on top of proven technologies such as HTML, CSS, JavaScript, Vue.js, Quasar, Tailwind, and FastAPI\u2014while exposing a Pythonic interface that feels natural to Python developers.\r\nThis talk traces that journey and distills the design principles that worked, those that didn\u2019t, and the patterns that ultimately enabled NiceGUI to provide a smooth developer experience.\r\nWe begin with a short demonstration of NiceGUI\u2019s \u201c3-line Hello World,\u201d highlighting how familiar Python code can generate dynamic web interfaces. From there, we examine the technical foundations that allow the framework to stand on the shoulders of major frontend and backend ecosystems.\r\nThe core of the talk focuses on Python language features and how they shape API design:\r\n\r\n* **Context managers** to express hierarchy and UI composition intuitively.\r\n* **Method chaining** inspired by the builder pattern for concise, readable configuration.\r\n* **Decorators** (such as @page and @refreshable) to define routing and reactive behaviour without ceremony.\r\n* **Async/await** for event handlers, background tasks, and page functions.\r\n* **Type hints** to support static analysis, IDE completion, and clearer API intent.\r\n* **Dataclasses** as bindable, structured state containers.\r\n* **Default arguments and sentinel patterns** to allow powerful yet discoverable APIs.\r\n\r\nAttendees will gain practical insights useful beyond NiceGUI itself: how to design Python APIs for GUI frameworks, dashboards, developer tools, or any domain where clarity, maintainability, and expressiveness matter. The talk is aimed at Python developers interested in web interfaces, framework design, or improving the ergonomics of their own libraries.", "recording_license": "", "do_not_record": false, "persons": [{"code": "RPP37L", "name": "Falko Schindler", "avatar": "https://pretalx.com/media/avatars/RPP37L_C49egIH.webp", "biography": "Falko Schindler is a software engineer at Zauberzeug and a creator of the open-source web UI framework, NiceGUI. He specializes in building the company\u2019s core software stack for robotics and automation projects.", "public_name": "Falko Schindler", "guid": "1d5c83e5-54f4-5ac9-948d-0459ac8c0b95", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/RPP37L/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/TZYGTL/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/TZYGTL/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/TZYGTL/resources/slides_3fMM4s9.pdf", "type": "related"}]}, {"guid": "c5908d11-58d3-5e0f-93d5-ac2ca1a1f04c", "code": "GATMPP", "id": 86453, "logo": null, "date": "2026-04-16T10:55:00+02:00", "start": "10:55", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-86453-surviving-ai-fatigue-staying-sane-and-relevant-in-a-fast-moving-field", "url": "https://pretalx.com/pyconde-pydata-2026/talk/GATMPP/", "title": "Surviving AI Fatigue: Staying Sane and Relevant in a Fast Moving Field", "subtitle": "", "track": "General: Education, Career & Life", "type": "Talk", "language": "en", "abstract": "In an era where new AI models, benchmarks, and frameworks emerge daily, many of us feel caught in a relentless cycle of catching up, what is called \"AI fatigue\". This talk dives into the causes and consequences of that fatigue, from information overload and social media hype to the constant pressure to stay relevant. Drawing on personal experience and community insights, we explore why chasing every new paper or trend often leads to burnout rather than mastery.\r\n\r\nMore importantly, we share practical, evidence-backed strategies to stay informed without losing balance: curating a focused \u201cinformation diet,\u201d setting clear boundaries, using summarization tools intelligently, maintaining a personal knowledge base, and embracing \u201cJOMO\u201d\u2014the joy of missing out. We also discuss how organizations can combat fatigue structurally by promoting focus, curiosity, and psychological safety.\r\n\r\nThis session is for anyone, from beginners to seasoned professionals, seeking to rediscover genuine curiosity in AI while preserving mental well-being. Attendees will leave with concrete tools, actionable habits, and a renewed sense that it is not only acceptable but healthy to not know everything.", "description": "The world of AI and machine learning is moving at breakneck speed, with new papers, models, benchmarks, and frameworks announced daily. If you have ever felt overwhelmed, behind, or simply exhausted trying to keep up, you are not alone. In this talk, we share our own journey grappling with AI fatigue, what it feels like, why it happens, and what we have learned about staying informed without burning out.\r\n\r\nWe will start by defining AI fatigue and reflecting on why it is such a pervasive experience in our community, from social media hype to the sheer pace of real innovation. We highlight some of the common pitfalls, like chasing every trend, consuming too much noise, or neglecting mental health, and show why these approaches are counterproductive.\r\n\r\nThen, we focus on actionable strategies and habits that actually work. We share concrete tips and techniques we personally use to manage our learning and maintain our enthusiasm for the field, including:\r\n\r\n- Crafting an intentional information diet with trusted sources\r\n- Setting clear boundaries and time boxing your learning\r\n- Building a personal knowledge base for long term retention\r\n- Using summarization tools to cut through dense papers and blogs\r\n- Practicing \u201cJOMO,\u201d the joy of missing out, by focusing on depth over breadth\r\n- Learning in public by teaching, blogging, or pairing with others\r\n- Designing small, achievable experiments to stay engaged and motivated\r\n\r\nFinally, we will suggest how organizations and teams can help prevent fatigue at a structural level by fostering focus, psychological safety, and curiosity instead of always on urgency.\r\n\r\nThis talk is for anyone, from beginner to expert, who wants to stay relevant and curious about AI without losing sight of their well being. You will leave with a set of practical tools, a fresh perspective on learning in a chaotic environment, and hopefully the reassurance that it is okay to not know everything.", "recording_license": "", "do_not_record": false, "persons": [{"code": "TJMBFL", "name": "Ajay", "avatar": null, "biography": "Senior R&D Engineer at Ansys with a PhD in computational science from RWTH Aachen University. Work in the area of simulations, machine learning and AI safety.", "public_name": "Ajay", "guid": "8423451a-8de4-5a71-9674-46f131ff37e3", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/TJMBFL/"}, {"code": "NH38CT", "name": "Jeyashree Krishnan", "avatar": null, "biography": "Jeyashree Krishnan is a Senior Machine Learning Engineer at Siemens AG. Her work focuses on building and operationalizing scalable machine learning services, with an emphasis on foundation models and time series forecasting. She is also a Visiting Researcher at the Center for Computational Life Sciences, RWTH Aachen University.", "public_name": "Jeyashree Krishnan", "guid": "08c016d9-4a89-5259-a1d3-f83603e51b06", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NH38CT/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/GATMPP/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/GATMPP/", "attachments": []}, {"guid": "8f7dcaf6-3a7b-50e5-8c3a-3978ede5d759", "code": "TRGQTL", "id": 86652, "logo": null, "date": "2026-04-16T11:35:00+02:00", "start": "11:35", "duration": "00:45", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-86652-open-table-formats-in-the-wild-reloaded-vortexing-ducks-over-floating-icebergs", "url": "https://pretalx.com/pyconde-pydata-2026/talk/TRGQTL/", "title": "Open Table Formats in the Wild\u2122 - Reloaded: Vortexing Ducks over Floating Icebergs", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk (long)", "language": "en", "abstract": "Open table formats have *almost* freed us from vendor lock-in. They form a critical building block of the modern, composable data stack. The most prominent open table format is Apache Iceberg - not only because of its storage layout, but also due to its REST catalog specification. Iceberg has gained significant traction through a recent stream of feature announcements from the community itself, major cloud providers like AWS, and data platform leaders such as Snowflake and Databricks.\r\n\r\nBut cutting through the hype: how does Iceberg actually perform in the real world if you are *not* Netflix or Apple which are capable of *Building Your Own Snowflake* (BYOS)? Can you realistically migrate from legacy solutions to Iceberg and enjoy all its promises without tradeoffs?\r\n\r\nThat, of course, is a rhetorical question. Some even argue that Iceberg got parts of the specification fundamentally wrong!?!\r\n\r\nCurious? Join me for another episode of Open Table Formats in the Wild\u2122. Expect a practical look at the current state of Apache Iceberg and Apache Parquet, alongside a gentle introduction to DuckLake and Vortex as promising contenders for table and file formats, respectively.", "description": "### Description\r\nThe core promise of open table formats is engine interoperability with ACID guarantees, mutability, and schema evolution for massive datasets stored on cheap, reliable cloud object storage. Modern data platforms demand far more than *just* interoperable, analytical batch processing. Engineers now require native support for CDC, incremental processing, streaming workloads, low-latency access, and point lookups - especially for AI-driven applications. Ideally, all of this would be covered by a single, unified solution.\r\n\r\nHowever, Parquet - the foundational format for physically storing much of today\u2019s data - predates both the AI boom and the era of unified batch and streaming systems. Likewise, Iceberg\u2019s original design DNA was firmly rooted in large-scale, batch-oriented analytical workloads. This raises an uncomfortable question: are Parquet and Iceberg truly up to the task?\r\n\r\nThis talk explores that question through real-world use cases and architectural constraints. While the focus is on conveying key ideas and practical insights, the session is aimed at an intermediate to advanced audience. If you are new to the topic, you may want to watch last year\u2019s [episode](https://youtu.be/YdFeHj5lRP4?si=NxO0Ot2-S_kYOokV) on Apache Parquet and Delta Lake, which provides a gentle introduction to the fundamentals of open table formats.\r\n\r\n### Takeaways\r\n\r\nAfter this talk, attendees will:\r\n- Understand why incremental processing is not a native concept in Apache Iceberg\r\n- Recognize how Iceberg\u2019s metadata model creates hard limits for low-latency streaming workloads\r\n- Learn why Parquet\u2019s physical layout becomes a bottleneck for point lookups and AI-driven access patterns\r\n- Get an early look at DuckLake and Vortex as emerging alternatives \r\n\r\n### Agenda\r\n\r\n**The Past (10 min)**\r\n- Rationale - **The Idealized Model**\r\n- Implications - **The Engineering Trade-offs**\r\n\r\n**The Present (15 min)**\r\n- Incremental Processing - **The Missing Primitive**\r\n- Streaming Workloads - **The Batch Inheritance**\r\n- AI Applications & Point Lookups - **The Access Wall**\r\n\r\n**The Future (15 min)**\r\n- DuckLake - **The Return of Relational Databases**\r\n- Vortex - **The Parquet of Tomorrow**", "recording_license": "", "do_not_record": false, "persons": [{"code": "3CXHP7", "name": "Franz W\u00f6llert", "avatar": "https://pretalx.com/media/avatars/3CXHP7_8G8qehl.webp", "biography": "Hi my name is Franz and I\u2019m an open source and python enthuisiast:\r\n\r\n- father of 3 girls\r\n- major in psychology\r\n- chess hobbiyst\r\n- former competitive ultimate frisbee player\r\n- likes cooking and baking sourdough bread", "public_name": "Franz W\u00f6llert", "guid": "c4193aee-6e76-5d3a-9f99-018a827796e1", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/3CXHP7/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/TRGQTL/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/TRGQTL/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/TRGQTL/resources/Vortex_nbHaoFT.pdf", "type": "related"}]}, {"guid": "d482c3fc-aa91-571e-a6be-76caa131f101", "code": "GYBRVN", "id": 87023, "logo": null, "date": "2026-04-16T13:20:00+02:00", "start": "13:20", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-87023-making-tech-tutorials-accessible-practical-techniques-for-educators", "url": "https://pretalx.com/pyconde-pydata-2026/talk/GYBRVN/", "title": "Making Tech Tutorials Accessible: Practical Techniques for Educators", "subtitle": "", "track": "General: Education, Career & Life", "type": "Talk", "language": "en", "abstract": "Want to make your tech tutorials accessible but don't know where to start? This talk shares practical techniques anyone can use.\r\nIn June 2025, I started creating tutorials for deaf and hard-of-hearing learners because my partner is hard of hearing. I learned that accessible content helps everyone: international learners, people on noisy trains, junior developers and tired seniors at the end of the day.\r\nIn this talk, I will share practical techniques for creating accessible tech tutorials:\r\n\u2022\tCreating videos with meaningful subtitles (manual timing, simple language)\r\n\u2022\tPrinciples of simple language for technical content\r\n\u2022\tStructuring content so everyone can navigate it easily\r\nI am a content creator who learned these techniques through experimentation while teaching Excel. The talk presents my actual workflow with examples from creating tutorials for deaf/hard-of-hearing learners.\r\nWhether you're creating video tutorials, writing documentation, or teaching workshops, you'll leave with actionable steps to make your content more accessible.\r\n\r\nWhy it matters: Tech education is growing globally. Making our content accessible isn't just good ethics\u2014it makes our teaching better for everyone.", "description": "Accessible content isn't just for people with disabilities\u2014it makes tech education better by design for everyone. International learners, people on noisy trains junior developers and tired seniors at the end of the day\u2014they all benefit from subtitles, simple language, and clear structure. Yet most developers who become educators have never learned how to make their content accessible.\r\nThis talk shares practical techniques I use creating tech tutorials for deaf and hard-of-hearing learners. Since June 2025, I've been creating Excel tutorial videos with manual subtitles in simple language for a YouTube community (~400 subscribers). My partner is hard of hearing, which taught me that accessibility isn't optional\u2014it's essential. The techniques could be applied to any tech content: Python tutorials, data science courses, documentation, or workshops.\r\nThe talk follows this structure:\r\n1. Understanding Barriers (5 minutes) Who benefits from accessible content? People with permanent, temporary, and situational limitations. \r\n2. Creating Accessible Videos (15 minutes) My core workflow: manual subtitles in DaVinci Resolve with timing based on text length, using AI tools to simplify technical language, visual clarity with arrows and highlights, and the insight that you can't be accessible to everyone\u2014focus on your target audience.\r\n3. Clear Structure for any Content (8 minutes) Applying video principles to any format: logical heading hierarchy for navigation, alternative text for images, using built-in accessibility checkers, and simple language techniques.\r\n4. Getting Started (2 minutes) One action to take this week, free tools to use, and resources for continued learning.\r\nI completed the W3C \"Introduction to Web Accessibility\" course and will be conducting a guest lecture on accessible learning materials at MSB Medical School Berlin (January 2026). As a non-native German speaker, I understand language barriers firsthand.\r\nAttendees will leave with practical techniques they can implement immediately and the confidence that accessibility is achievable without being an expert. No prior knowledge on accessibility is required.", "recording_license": "", "do_not_record": false, "persons": [{"code": "UVGDHP", "name": "Tamara Badikyan", "avatar": "https://pretalx.com/media/avatars/UVGDHP_YzxenLf.webp", "biography": "Tamara Badikyan is a Data Analyst currently working at the National Association of Statutory Health Insurance Physicians (KBV) in Berlin. Since June 2025, she has been creating accessible tech tutorials for deaf and hard-of-hearing learners. She runs a YouTube channel focused on making Excel content accessible through manual subtitles and simple language.\r\nTamara holds master's degrees in Migration and Intercultural Relations (Erasmus Mundus Program, University of Oldenburg) and Sociology and Social Anthropology (Central European University, Budapest). She completed the W3C \"Introduction to Web Accessibility\" course and will be conducting a guest lecture on accessible learning materials at MSB Medical School Berlin in January 2026.\r\nAs a non-native German speaker who is also learning German Sign Language, Tamara understands language barriers and accessibility challenges firsthand.", "public_name": "Tamara Badikyan", "guid": "2a160f35-1012-5f42-9234-2f4d13bfe4ee", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/UVGDHP/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/GYBRVN/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/GYBRVN/", "attachments": [{"title": "PDF of the slides", "url": "/media/pyconde-pydata-2026/submissions/GYBRVN/resources/Tamara_9pwHimb.pdf", "type": "related"}]}, {"guid": "ada0ff8f-6e11-5176-b13b-289cc258d8a5", "code": "TB9WYZ", "id": 87250, "logo": null, "date": "2026-04-16T14:00:00+02:00", "start": "14:00", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-87250-how-to-compare-apples-with-oranges-proper-evaluation-of-article-level-demand-forecasts", "url": "https://pretalx.com/pyconde-pydata-2026/talk/TB9WYZ/", "title": "How to compare apples with oranges: Proper evaluation of article-level demand forecasts", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk", "language": "en", "abstract": "How do you evaluate performance when you predict more than 10 million time series each day? While a good plot can be worth more than a thousand metrics for a single time series, with large-scale machine learning models implemented with *LightGBM* and *PyTorch* we have to resort to meaningful aggregations. We will share insights and learnings from the past 2 years of deploying and operating our article-level demand forecasting models at the pricing department of Zalando.\r\nThis talk moves beyond basic metrics to showcase the pitfalls of aggregated error measures and the best practices we\u2019ve developed to keep our stakeholders informed and our models accurate.", "description": "At the pricing department in Zalando, we are predicting future demand  for millions of articles on a daily basis by large-scale machine-learning models. These forecasts are key for discount decisions taken downstream. As evaluating every forecast on its own becomes infeasible at this scale and frequency we created a set of aggregated metrics that help us make informed statements about the performance of our models. On the one hand these metrics are being used by us to further improve our forecasting models, on the other hand they are used by our stakeholders to make informed decisions.\r\n \r\nTo handle this volume, we use *PySpark* for data processing and scaling our evaluations across the entire assortment. Furthermore, evaluating forecast performance in this context is crucial in two different scenarios, namely when analysing past forecast performance and when creating and comparing alternative models. In both cases we look at different time ranges and possible different subsets of the forecasted articles and calculate aggregated performance measures to compare them. We want to answer questions like\r\n\r\n\r\n - \u201cIs this forecast performing better in low-discount periods than during sales events?\u201d\r\n - \u201cDid we make a higher error on highly discounted articles during last week?\u201d\r\n - \u201cIs this model well-suited to predict high (or low) selling articles?\u201d\r\n - \u201cDid our model perform well for sneakers during the last voucher event?\u201d\r\n\r\nEvaluating aggregated metrics like a relative mean squared error (MSE) or an mean absolute percentage error (MAPE) over different sets of articles has lots of pitfalls. Comparing different parts of the assortments leads to an \"Apples vs. Oranges\" problem that we want to elaborate on based on examples we experienced in our daily work.\r\n\r\nTo answer the questions above we developed a set of aggregated metrics that we monitor on a daily basis using *plotly* and *streamlit* for clear, interactive visualization. We want to present these metrics and explain how they are useful for the questions and tasks mentioned above. We will highlight the techniques and best practices to draw meaningful insights from evaluating forecast performance and how we are able to compare apples with oranges using meaningful lower bounds for our aggregated metrics.\r\n\r\nWe also want to share how observations from our monitoring influenced the evolution of our *LightGBM* and *PyTorch* models and how it shaped important parts like feature engineering, hyperparameter tuning and the choice of our loss functions. Lastly we will touch on how to communicate these sometimes very technical numbers with stakeholders so that they can make informed decisions without being overwhelmed by details.", "recording_license": "", "do_not_record": false, "persons": [{"code": "BSV8LH", "name": "Stefan Birr", "avatar": "https://pretalx.com/media/avatars/BSV8LH_DHPe5tq.webp", "biography": "Senior Applied Scientist at **Zalando**, working on developing large scale forecasting systems. Stefan holds a PhD in Mathematics from **Ruhr University Bochum** where his research focused on \"Analyzing dynamic dependencies in time series. Prior to his 3 years at Zalando he worked for 5 years at E.ON as a Data Scientist creating algorithms for smart meter analytics and forecasting.", "public_name": "Stefan Birr", "guid": "d0166556-92c9-5514-81da-2566ed934710", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/BSV8LH/"}, {"code": "LHFSUL", "name": "Mones Raslan", "avatar": "https://pretalx.com/media/avatars/LHFSUL_C8cYDIg.webp", "biography": "Mones is a Senior Applied Scientist at Zalando, where he builds large-scale predictive models for pricing \u2014 which requires thinking about forecast evaluation from a business perspective. He holds a PhD from TU Berlin on foundational properties of neural networks.", "public_name": "Mones Raslan", "guid": "221a2c5d-b036-5bd2-bd45-50584a3cab34", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/LHFSUL/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/TB9WYZ/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/TB9WYZ/", "attachments": []}, {"guid": "62e20977-6c7b-5df7-8f58-c4378bc16247", "code": "WDHTQR", "id": 86959, "logo": null, "date": "2026-04-16T15:05:00+02:00", "start": "15:05", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-86959-simulating-the-world-using-simpy-a-practical-example", "url": "https://pretalx.com/pyconde-pydata-2026/talk/WDHTQR/", "title": "Simulating the World using SimPy: A practical Example", "subtitle": "", "track": "PyData: PyData & Scientific Libraries Stack", "type": "Talk", "language": "en", "abstract": "Modern systems are complex - and testing them in real environments is often expensive, risky, or simply not reproducible. Simulation is a practical way to explore behavior under controlled conditions: run scenarios, validate assumptions, inject failures on purpose, and repeat experiments without touching production.\r\n\r\nIn this talk, I build a concrete event-based simulation with `SimPy` to compare `load-balancing algorithms` under different conditions. I\u2019ll show how `SimPy`\u2019s processes and events fit together, how to structure the simulation cleanly, and how to move beyond a one-off demo by making runs reproducible and configurable - using `configuration files` and a simple `command-line interface`.", "description": "Real-world systems are often too complex to test reliably, in the same environment and under the same conditions. Changes are hard to measure, edge cases are difficult to reproduce, and external influences can hide the real behavior of a system. Simulation offers a way to abstract from reality while staying close enough to produce meaningful results. It allows full control over system components, timing, and disruptions, and makes it possible to test many scenarios in a repeatable way.\r\n\r\nThe practical example of this talk focuses on simulating `load-balancing algorithms`. Load-balancers are a good example of systems that are hard to evaluate in real environments. Some tested algorithms have no existing implementation, others differ across platforms, and cloud environments introduce many uncontrollable factors such as network latency, cloud noise, and reoccurring background workloads. These factors make fair and consistent testing almost impossible.\r\n\r\nThe problem is addressed by building an event-based simulation using `SimPy`. The session explains how `SimPy` works by using `Generators` to create the events, and how time and processes interact inside a simulation. An architecture for a practical example for a load-balancer simulation is presented, showing how different components interact and how algorithms can be swapped and compared.\r\n\r\nThe talk also covers improvements made to the simulation, including a `command-line interface` for easier execution and a `YAML` configuration file for flexible setup. It concludes with practical tips and lessons learned when working with `SimPy`, helping to avoid common pitfalls and improve simulation design.\r\n\r\nOverall, the session provides an introduction to simulation as a testing tool, a hands-on example using `SimPy`, and a realistic architecture for building and evolving simulations in Python.", "recording_license": "", "do_not_record": false, "persons": [{"code": "VRJLJE", "name": "Niklas", "avatar": "https://pretalx.com/media/avatars/VRJLJE_yWEldKT.webp", "biography": "Cloud-Engineer at inovex helping to develop and provide a private cloud infrastructure with a focus on performance and optimization.", "public_name": "Niklas", "guid": "aa10539c-7588-5377-92b4-eeb51937ab57", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/VRJLJE/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/WDHTQR/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/WDHTQR/", "attachments": [{"title": "Presentation", "url": "/media/pyconde-pydata-2026/submissions/WDHTQR/resources/PyCon__bLKfLKf.pdf", "type": "related"}]}, {"guid": "2208a55f-ca50-5b56-a6e7-8a822b209e4c", "code": "MLUK9M", "id": 86839, "logo": null, "date": "2026-04-16T15:45:00+02:00", "start": "15:45", "duration": "00:30", "room": "Titanium [2nd Floor]", "slug": "pyconde-pydata-2026-86839-why-did-the-model-do-that-debugging-the-ghost-in-the-machine", "url": "https://pretalx.com/pyconde-pydata-2026/talk/MLUK9M/", "title": "Why Did The Model Do That? Debugging the Ghost in the Machine", "subtitle": "", "track": "General: Ethics & Privacy", "type": "Talk", "language": "en", "abstract": "Why did the model say \"No\"? In an era where machine learning models increasingly influence high-stake decisions, \"trust me\" isn't a sufficient explanation. Yet, the logic behind many model decisions remains a black box, often hiding bias and making it difficult to establish trust.\r\n\r\nIn this talk, we move beyond the mystery of the \"ghost in the machine\" and into practical debugging using a structured *XAI Decision Tree*. Instead of guessing which method to use, we will walk through a logical framework that narrows down the field based on a few critical questions: the type of data you have, the level of model access available, and whether you need to explain a single prediction or the entire system.\r\n\r\nThe audience will leave with a clear path to choosing the right explainable AI (XAI) method - such as SHAP, LIME, or Integrated Gradients - and the corresponding Python framework for their specific use case.\r\n\r\nThis session will cover:\r\n\r\n- Importance of XAI: Understanding why XAI is crucial using a real-world example\r\n- XAI Landscape: An overview of existing XAI methods and how they are related\r\n- XAI Decision Tree: How to use the structured XAI decision tree to choose the right explanation method for your use case\r\n- Local vs global: A common understanding of local vs global explainability\r\n- XAI in Practice: XAI in practice as well as corresponding Python frameworks to use", "description": "My planned outline for the talk is as follows:\r\n\r\n- **Intro and opening hook** (4 mins): A look at a clearly biased model and why \"black box\" decisions fail to establish trust\r\n- **The XAI Decision Tree** (17 mins):\r\n\r\n    * A practical overview of the landscape and walking through the tree: Selecting the right method based on your model and data\r\n     * Mapping these methods to specific Python libraries and frameworks (e.g., `shap`, `lime`, `captum`, `transformers-interpret`, `alibi`, `dalex`, ...)\r\n\r\n- **Closing and Take-away** (4 mins)\r\n- **Q&A and Buffer** (5 mins)", "recording_license": "", "do_not_record": false, "persons": [{"code": "7GY7XH", "name": "Cosima Meyer", "avatar": "https://pretalx.com/media/avatars/7GY7XH_XhM8KZg.webp", "biography": "Cosima Meyer is a data scientist with a strong focus on making machine learning models explainable and accessible. Passionate about trustworthy AI, she is committed to building systems that are not only technically robust but also transparent and ethical. As a Google's Women Techmakers Ambassador and an active member of PyLadies, Cosima is dedicated to fostering inclusive and collaborative communities, working to bridge the two groups and create spaces for knowledge-sharing and growth.\r\n\r\nDuring her PhD studies at the University of Mannheim, Cosima discovered her enthusiasm for sharing knowledge through technical blog posts and developing open-source software. Her work reflects a blend of technical expertise and a passion for community building, inspiring others to explore, learn, and contribute to the fields of AI and data science.", "public_name": "Cosima Meyer", "guid": "5b698689-8687-50a1-9c8a-9ec5ba8810b7", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/7GY7XH/"}], "links": [{"title": "Why Did The Model Do That? Debugging the Ghost in the Machine (Presentation)", "url": "https://bit.ly/xai-talk", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/MLUK9M/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/MLUK9M/", "attachments": []}], "Helium [3rd Floor]": [{"guid": "ef1c6ce2-cde9-50f2-984e-56f7401d69e4", "code": "3U3BZH", "id": 87701, "logo": null, "date": "2026-04-16T10:15:00+02:00", "start": "10:15", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-87701-embedding-data-science-in-iot-devices-with-micropython-and-emlearn", "url": "https://pretalx.com/pyconde-pydata-2026/talk/3U3BZH/", "title": "Embedding Data Science in IoT devices with MicroPython and emlearn", "subtitle": "", "track": "PyCon: Embedded Systems & Robotics", "type": "Talk", "language": "en", "abstract": "Python is the standard solution for many machine learning and data science applications,\r\nfrom large cloud systems, to workstations, and even on larger embedded or robotics systems.\r\nBut as we move down into more constrained environments regular (C)Python starts to be a less good fit.\r\nThe MicroPython project provides a Python implementation that is tailored for such environments,\r\nand this makes it possible scale down to microcontrollers with just a few megabytes of RAM (or less!).\r\nAs a bonus, MicroPython with WebAssembly also makes lightweight browser applications possible.\r\nIn this talk, we will discuss how to combine Internet of Things (IoT) hardware, MicroPython and browser to build stand-alone smart sensor systems and laboratory gear for physical data science.", "description": "Typical Internet of Things devices send off most of the data to an external cloud service for analysis.\r\nThis causes challenges both in terms privacy, poor reliability under poor connectivity, and loss-of-availability when the service is discontinued.\r\n\r\nWe would like to show that it is possible to achieve the majority of functionality using a local-first approach, including machine-learning based sensor-data analysis.\r\nAnd that this can done on low-cost microcontrollers such as ESP32.\r\n\r\nThis talk will cover how to build stand-alone devices for measuring and analying physical sensor data, using MicroPython. This includes these aspects:\r\n\r\n- Measuring the surroundings using sensors\r\n- Connectivity using WiFi\r\n- Data storage using on-board filesystem\r\n- Serving a webui for configuration/control, using Microdot\r\n- Automated data processing/analysis using DSP and ML, with emlearn-micropython\r\n- Enabling interactive data analysis via webui\r\n- Managing concurrency on microcontroller, using asyncio\r\n- Optional integration. Pull using HTTP, and/or push using Webhooks/MQTT\r\n\r\nThe sensor data will either be accelerometer, sound or images/video (To be Decided).\r\n\r\n### About MicroPython\r\n\r\nMicroPython is an implementation of Python that runs on practically all microcontrollers with 128kB+ RAM. It provides access to the microcontroller hardware, functions for interacting with sensors and external pheripherals, as well as connectivity options such as WiFi, Ethernet, Bluetooth Low Energy, etc.\r\n\r\nWhile MicroPython can target a very wide range of hardware, we will focus on the Espressif ESP32 family of devices. These are very powerful and affordable, with good WiFi+BLE connectivity support, good open-source toolchains, are very popular both among hobbyist and companies, and have many good ready-to-use hardware development kits.\r\n\r\n### About emlearn-micropython\r\n\r\nemlearn-micropython is Machine Learning and Digital Signal Processing package for MicroPython, built on top of the emlearn C library. It provides convenient and efficient MicroPython modules, and enables application developers to run efficient Machine Learning models on microcontroller, without having to touch any C code. Compared to pure-Python approaches, the emlearn-micropython models are typically 10-100x faster and smaller.\r\n\r\n### Intended audience and expected background\r\n\r\nIntended audience: Any developer or data scientist curious about sensor data processing, IoT, and how Python scales down to the smallest of devices.\r\n\r\nThe audience is expected to have a basic literacy in Python and proficiency in programming. \r\nFamiliarity with microcontrollers and embedded systems is of course an advantage, but the talk should be approachable to those who are new to this area. Familiarity with basic networking and web/browser concepts is an advantage.", "recording_license": "", "do_not_record": false, "persons": [{"code": "CVFFNV", "name": "Jon Nordby", "avatar": "https://pretalx.com/media/avatars/CVFFNV_DB0OXgC.webp", "biography": "Jon is a Machine Learning Engineer specialized in IoT systems, with a Master in Data Science and a Bachelor in Electronics Engineering. He has been contributing to open-source software since 2010.\r\n\r\nThese days Jon is Head of Data Science at Soundsensing, a provider of monitoring solutions for HVAC systems in commercial buildings. He is also the maintainer of emlearn, an open-source Machine Learning library for microcontrollers.", "public_name": "Jon Nordby", "guid": "714a64bf-3ab3-5c35-808f-c554be1d2d97", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/CVFFNV/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/3U3BZH/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/3U3BZH/", "attachments": []}, {"guid": "963cc534-6ced-51e8-b584-4af4dbd36fb6", "code": "RQTJFS", "id": 87729, "logo": null, "date": "2026-04-16T10:55:00+02:00", "start": "10:55", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-87729-how-we-built-an-inclusive-data-organization-careers-community-50-women", "url": "https://pretalx.com/pyconde-pydata-2026/talk/RQTJFS/", "title": "How We Built an Inclusive Data Organization: Careers, Community & 50% Women", "subtitle": "", "track": "General: Community & Diversity", "type": "Talk", "language": "en", "abstract": "Building inclusive data teams and sustainable career paths is a challenge many organizations struggle with\u2014especially in fast-growing, highly technical environments. Data careers are often portrayed as linear, while diversity initiatives remain abstract or ineffective in practice.\r\n\r\nThis talk shares concrete, experience-based lessons from building an inclusive data organization that supports career growth, fosters an internal data science community, and achieved more than 50% women representation in data roles. Rather than focusing on theory, the session highlights practical decisions, structural changes, and leadership behaviors that made inclusion measurable and sustainable.\r\n\r\nAttendees will gain actionable insights into designing career paths that support non-linear journeys, creating internal data communities that encourage learning and collaboration, and implementing diversity practices that strengthen\u2014rather than dilute\u2014technical excellence. The talk is relevant for data scientists, engineers, team leads, and managers who want to build better teams and healthier data cultures.", "description": "Many organizations aim to grow strong data teams, yet struggle with three connected challenges: unclear career paths, weak internal data communities, and a lack of diversity\u2014especially in senior and technical roles. These challenges are often treated separately, even though they strongly influence one another.\r\n\r\nThis talk presents a holistic approach to building an inclusive data organization by aligning career development, community building, and diversity goals. The focus is on practical actions and structural choices that can be applied in real-world settings, regardless of company size or industry.\r\n\r\nTalk Outline\r\n1. The problem: why data organizations struggle\r\n- Common myths about data careers (linear paths, constant availability, narrow profiles)\r\n- Why diversity efforts often fail in technical teams\r\n- The cost of ignoring community and inclusion: attrition, silos, burnout, and missed talent\r\n\r\n2. Career growth beyond linear paths\r\n- Designing career paths that support different life phases and backgrounds\r\n- Recognizing and valuing transferable skills in data roles\r\n- Making progression criteria transparent and fair\r\n- Supporting growth from individual contributor to leadership without forcing a single model\r\n\r\n3. Building an internal data science community\r\n- Why internal communities matter for learning, retention, and impact\r\n- Creating spaces for knowledge sharing without gatekeeping\r\n- Encouraging collaboration across roles (data science, engineering, analytics)\r\n- Aligning community activities with business value and technical standards\r\n\r\n4. Achieving diversity with intention\r\n- What \u201c50% women in data\u201d actually requires in practice\r\n- Hiring processes that reduce bias while maintaining technical excellence\r\n- Inclusive team structures and ways of working\r\n- Leadership behaviors that support inclusion without tokenism\r\n\r\n5. What worked\u2014and what didn\u2019t\r\n- Trade-offs, challenges, and lessons learned\r\n- Why inclusion is a continuous process, not a one-time initiative\r\n\r\n6. Actionable takeaways\r\n- Practical steps attendees can apply in their own teams\r\n- Signals to look for when inclusion efforts are working\u2014or failing\r\n- How to start small and scale impact over time", "recording_license": "", "do_not_record": false, "persons": [{"code": "UC7VCN", "name": "Xia He-Bleinagel", "avatar": "https://pretalx.com/media/avatars/UC7VCN_BkFYOle.webp", "biography": "Head of Data & Cloud, focused on inclusive career development, internal data science community, and creating diverse, high-performing data organization.", "public_name": "Xia He-Bleinagel", "guid": "188ad0d5-9370-5da4-99c9-986af1fd83af", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/UC7VCN/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/RQTJFS/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/RQTJFS/", "attachments": []}, {"guid": "3e8b05bd-df6a-5fb9-925f-b0273f74332e", "code": "9PBYAP", "id": 87958, "logo": null, "date": "2026-04-16T11:35:00+02:00", "start": "11:35", "duration": "00:45", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-87958-securing-ai-agentic-systems-enforcing-safety-constraints-in-ai-agent", "url": "https://pretalx.com/pyconde-pydata-2026/talk/9PBYAP/", "title": "Securing AI Agentic Systems: Enforcing Safety Constraints in AI Agent", "subtitle": "", "track": "PyCon: Security", "type": "Talk (long)", "language": "en", "abstract": "AI agents are increasingly deployed with autonomy: calling tools, accessing data, modifying systems, and making decisions without human supervision. While prompts and guardrails are often presented as safety solutions, they break down quickly in real-world agentic systems.\r\n\r\nIn this talk, we explore how to enforce safety constraints in AI agents beyond prompting, using engineering techniques familiar to Python developers and data engineers. We will examine common failure modes in agentic systems such as tool misuse, goal drift, and over-permissioning and show how to mitigate them using policy layers, capability boundaries, and execution-time validation.", "description": "AI agents are increasingly used as autonomous systems that can call tools, access data, and take actions in real environments. As these systems gain more autonomy, ensuring their safe and predictable behavior becomes an engineering challenge rather than a prompting problem.\r\n\r\nThis talk examines how safety constraints can be explicitly enforced in agentic AI systems, instead of relying solely on natural language instructions or model alignment. We will discuss typical safety and security issues that arise in agent based architectures, including over permissioned tools, unintended action chains, goal drift, and unsafe retries.\r\n\r\nUsing practical Python examples, the talk introduces architectural patterns for constraining agent behavior, such as policy layers, capability based tool access, action budgets, and runtime validation of agent decisions before execution. We will also explore how human in the loop checkpoints and audit logging can be integrated into agent workflows to support safer operation in production environments.\r\n\r\nThe focus of this session is on practical design and implementation techniques that help developers build AI agents with clearly defined boundaries, making their behavior more controllable, observable, and secure\r\n\r\nThrough practical Python examples, we will demonstrate how to:\r\n\r\n- Design constrained agent architectures\r\n- Enforce tool level permissions and action budgets\r\n- Validate and block unsafe agent actions at runtime\r\n- Combine human-in-the-loop checkpoints with automated controls", "recording_license": "", "do_not_record": false, "persons": [{"code": "3VHXXW", "name": "John Robert", "avatar": "https://pretalx.com/media/avatars/3VHXXW_K3RUGDC.webp", "biography": "John Robert leads data and cloud projects at Sunnic Lighthouse (Enerparc AG), where he works on building and operating data-intensive workflows in production. He has over eight years of experience with Python, machine learning, and AI, and began his career working on autonomous driving systems at Daimler (Mercedes-Benz).\r\n\r\nJohn has spoken at conferences across Europe, the United States, and other regions, sharing practical insights on building, deploying, and operating AI systems in real-world environments. His current focus is on AI safety and AI security, particularly how agentic and autonomous systems can be designed with clear boundaries and controls.\r\n\r\nHe is the founder of Don\u2019t Fear AI, an initiative aimed at helping people understand how to use AI responsibly and how to build reliable AI systems without hype or unnecessary complexity. John believes in a future where humans and AI systems work together safely and effectively.\r\n\r\nOutside of technology, John enjoys traveling and has visited nearly 50 countries.", "public_name": "John Robert", "guid": "bc2ec8f8-cb8a-5e0d-a30c-92a52ceb4617", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/3VHXXW/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/9PBYAP/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/9PBYAP/", "attachments": []}, {"guid": "367cc276-c332-51e5-a6e0-110f2362a689", "code": "MS7AWK", "id": 87223, "logo": null, "date": "2026-04-16T13:20:00+02:00", "start": "13:20", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-87223-escape-the-hype-teaching-llm-concepts-through-an-interactive-ai-factory-game", "url": "https://pretalx.com/pyconde-pydata-2026/talk/MS7AWK/", "title": "Escape the Hype: Teaching LLM Concepts Through an Interactive AI Factory Game", "subtitle": "", "track": "General: Education, Career & Life", "type": "Talk", "language": "en", "abstract": "Everyone talks about LLMs, RAG, and AI agents - but who truly understands them? Marketing promises magic while documentation assumes expertise. Recent research from Gartner reveals the consequences: only 8% of HR leaders believe their managers possess adequate AI competency, while companies that restructure work around AI achieve revenue goals twice as often as those who merely train employees. The problem isn't lack of information; it's the lack of genuine understanding through experience.\r\n\r\nWe took a different approach. Instead of slides or tutorials, we built \"AI Factory\" - a non-profit educational platform in the form of escape room game where players learn by doing. Craft prompts under budget pressure. Watch guardrails fail in real-time. Break their own RAG pipeline. Each mistake teaches more than any documentation ever could. \r\n\r\nIn this talk, we'll share what we discovered while building and testing this game with real users: why failure-driven learning outperforms tutorials, how game mechanics create memorable \"aha moments,\" and the surprising concepts that clicked only through play.", "description": "The gap between AI adoption and AI understanding keeps growing. Teams copy-paste prompts without understanding why they work, vendor materials highlight capabilities over limitations, and the EU AI Act now requires organizations to ensure \"a sufficient level of AI literacy among their staff.\" Traditional training \u2014 documentation, tutorials, talks \u2014 isn't closing this gap. What's missing is embodied learning: touching the parameters, breaking the system, feeling the consequences.\r\n\r\n**Our Approach**\r\n\r\nWe built \"AI Factory\" \u2014 a Python-based educational game where players learn LLM concepts through hands-on challenges. Set in a magical potion factory, players master prompt engineering, guardrails, RAG pipelines, MCP tool orchestration, and multi-agent coordination.\r\n\r\nWhat makes it different from typical AI tutorials:\r\n\r\n- Real API calls, not simulations. Players interact with actual LLMs \u2014 when they misconfigure guardrails or adjust temperature, they see real consequences that transfer directly to production.\r\n- Budget-driven decisions. Every API call costs in-game currency, forcing the same quality-cost-speed tradeoffs faced in real deployments.\r\n- Progressive disclosure over information dumps. Each game stage reveals one missing piece. The full picture only clicks at the end \u2014 and that revelation is the reward.\r\n- Immediate, specific, actionable feedback. Players see results the moment they submit \u2014 not just \"incorrect,\" but a diagnostic breakdown of exactly what went wrong, clear enough to act on and retry.\r\n\r\n**What This Talk Covers**\r\n\r\nWe share concrete design decisions and their outcomes \u2014 what worked, what didn't, and what surprised us:\r\n\r\n- Narrative vs. jargon. How story-driven framing changed the way players understood complex concepts like RAG \u2014 without a single slide of theory.\r\n- Constraints as a teaching tool. Why our first budget system backfired, and how a small redesign turned frustration into strategic thinking.\r\n- When to simulate instead of build. Where we replaced real infrastructure with controlled illusions \u2014 and why the learning outcome didn't suffer.\r\n- One game, many audiences. How players from different backgrounds found completely different entry points into the same levels.\r\n- Scoring on top of non-deterministic AI. How we built a reliable evaluation engine for a system that never gives the same answer twice.\r\n\r\n**Who Should Attend this Talk**\r\n\r\nThis talk is designed for multiple audiences:\r\n\r\n- Educators and trainers looking for new approaches to teaching AI concepts\r\n- Team leads responsible for upskilling teams on AI fundamentals \u2014 take away a tested approach, not just theory\r\n- Anyone interested in gamification as an approach to technical education", "recording_license": "", "do_not_record": false, "persons": [{"code": "VNDGHR", "name": "Vadim Vlasov", "avatar": "https://pretalx.com/media/avatars/VNDGHR_hlO8xKO.webp", "biography": "I'm a Data Scientist based in Munich who believes AI should be understood, not feared. After earning my Master's at LMU Munich, I've spent the past five years turning complex ML challenges\u2014from computer vision to agentic systems\u2014into working solutions. But what really excites me is making AI click for others: whether through hands-on workshops or building interactive experiences that turn abstract concepts into \"aha!\" moments. When I'm not wrangling models, you'll find me exploring ways to gamify learning and bridge the gap between cutting-edge AI and everyday understanding", "public_name": "Vadim Vlasov", "guid": "2a2b9938-a046-5be7-a788-93abab750f5a", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/VNDGHR/"}, {"code": "FKLVKJ", "name": "Eric Glaser", "avatar": "https://pretalx.com/media/avatars/FKLVKJ_9Ignzv7.webp", "biography": "I am a data scientist at Steadforce, building LLM and agent workflows with Python from cloud to edge. My current focus is AI literacy: helping teams understand what LLMs, RAG, and agents actually do beyond the hype. I co-designed \u201cAI Factory,\u201d a game where players break and fix AI systems to build real intuition.", "public_name": "Eric Glaser", "guid": "6b34f22a-bacc-5a19-ba01-5496a3d74fea", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/FKLVKJ/"}, {"code": "KN3MLY", "name": "Lisa Amrhein", "avatar": "https://pretalx.com/media/avatars/KN3MLY_QtXVXIP.webp", "biography": "I'm a Data Scientist who enjoys turning complex systems into practical, intuitive solutions. After earning my PhD in Mathematics I\u2019ve spent my career turning complex scientific ideas into practical computational tools. My work ranges from exploring the frontiers of GenAI to building semantic data layers, and I spend much of my time developing scientific software and digital twins for real\u2011world processes. I love creating tools that make sophisticated models understandable and usable, bridging the gap between deep technical detail and everyday application.", "public_name": "Lisa Amrhein", "guid": "8e2e4934-3c0f-5bdb-b275-29ad626e5db4", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/KN3MLY/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/MS7AWK/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/MS7AWK/", "attachments": [{"title": "Escape the Hype: Teaching LLM Concepts Through an Interactive AI Factory Game", "url": "/media/pyconde-pydata-2026/submissions/MS7AWK/resources/Escape_Of3Ay8m.pdf", "type": "related"}]}, {"guid": "09ad1395-848c-5c9a-acbb-7c6d7df49828", "code": "TST9LF", "id": 86841, "logo": null, "date": "2026-04-16T14:00:00+02:00", "start": "14:00", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-86841-dynamic-knowledge-graphs", "url": "https://pretalx.com/pyconde-pydata-2026/talk/TST9LF/", "title": "Dynamic Knowledge Graphs", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk", "language": "en", "abstract": "Traditional RAG systems struggle to understand holistic connections in distributed, constantly changing knowledge sources that characterize real-world organizations. While document-based approaches using vector embeddings provide basic retrieval, they fail to capture relationships and answer complex questions about interconnected information. Graph-based RAG offers a solution, but existing implementations like Microsoft's GraphRAG explicitly avoid dynamic operations due to complexity, requiring costly rebuilds when knowledge changes.\r\n\r\nThis talk introduces a production-ready dynamic knowledge graph system that supports real-time insertion, querying, and deletion of information. Through practical implementation details you will learn to build maintainable knowledge graphs that evolve with data, handle ambiguous entities and preserve information lineage.", "description": "Like many organizations, we at VisualVest face the challenge of distributed and constantly evolving knowledge sources. Documentation lives across repositories, internal wikis, JIRA tickets, and various file formats in cloud storage. With ~250 employees making daily changes, our source of truth is highly dynamic. While traditional document-based RAG using semantic embeddings solved some of these pain points, it couldn't answer holistic questions or understand relationships between sources, leading us to explore graph-based approaches.\r\n\r\nThe challenge? Real-world knowledge sources are inherently dynamic. When thinking about information management and retrieval, we cannot ignore this reality if we want to create powerful, machine-readable and actually useful products. Microsoft's popular [GraphRAG](https://microsoft.github.io/graphrag/) library [explicitly rejected dynamic features](https://github.com/microsoft/graphrag/issues/429) (like deletion) due to complexity concerns. However, we believe that constantly rebuilding entire graphs isn't feasible for production systems.\r\n\r\nThis talk presents our solution: a truly dynamic knowledge graph with full insertion, query and deletion capabilities. We are also working on reducing the high computational cost of building knowledge graphs. Through caching strategies and small language model fine-tuning, we are trying to minimized both computational effort and strengthen our independence from cloud providers.\r\n\r\nWhat you'll learn:\r\n- An industry perspective on the challenges of distributed knowledge sources\r\n- Formal definition and properties of dynamic knowledge graphs\r\n- Our transformation pipeline\r\n  - Experiments with fine-tuned small-language models\r\n- Implementation details:\r\n  - Inserting nodes and edges while preventing ambiguity through similarity matching\r\n  - Tracking information origin across sources\r\n  - Safely deleting documents from the graph without breaking relationships\r\n  - Graph inference strategies\r\n\r\nBy the end of this talk, you'll understand why real-world knowledge graphs should be dynamic, how to build one yourself as well as the limitations and future directions of our approach.", "recording_license": "", "do_not_record": false, "persons": [{"code": "FYA7P8", "name": "Jakob Leander M\u00fcller", "avatar": "https://pretalx.com/media/avatars/FYA7P8_tndg8Q6.webp", "biography": "Hey, Im Jakob. I have studied Data Science in my Bachelors and Masters and currently work at a fin-tech where Im involved in all kinds of projects. My main goal is creating things that are actually useful and not just full of buzz-words. Im a big fan of visualizing things and always make sure that anyone who is interested in the topics Im working on can follow the reasoning of the chosen approach.", "public_name": "Jakob Leander M\u00fcller", "guid": "d4f636e5-faa2-5383-803a-9fb93f0e8947", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/FYA7P8/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/TST9LF/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/TST9LF/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/TST9LF/resources/pycon__jVY0clZ.pdf", "type": "related"}]}, {"guid": "00457723-6901-52b8-b49d-001d8baecc01", "code": "BRCNB7", "id": 86784, "logo": null, "date": "2026-04-16T15:05:00+02:00", "start": "15:05", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-86784-autism-and-the-predictive-brain-theory-in-tech", "url": "https://pretalx.com/pyconde-pydata-2026/talk/BRCNB7/", "title": "(Autism and) The Predictive Brain Theory (in Tech)", "subtitle": "", "track": "General: Education, Career & Life", "type": "Talk", "language": "en", "abstract": "New studies showed how the brain is not a passive receiver of stimuli but an active predictor of stimuli. People with autism have more difficulties when the predicted and received stimuli doe not match. How do we create a tech workforce where autistic individuals can work more comfortable due to predictability?", "description": "What does new autism, AI and quantum computing research have in common? We need to stop following the well known 'input - process - output' model. Let's escape this model and embrace the predictive brain theory to understand autism and how people on the spectrum interact with technology and at the workplace.\r\n\r\nRecent brain research shows that the brain is not a passive receiver of stimuli, but an active predictor of what will happen next. The brain is constantly building a big model of the world based on past experiences and using this model to predict what will happen next. When we have a big model of the world we know what to expect. If something unexpected happens, the brain receives this as an error and needs to update its model to accommodate the new information. This is not autism or tech specific, this is how the brain works for everyone according to the latest brain research.\r\n\r\nA recent hypothesis suggests that people on the autism spectrum often have a harder time building the big model of the world and the stimuli-response system of people on the spectrum is often more sensitive, adding context blindness and the higher energy cost of executive functioning to the mix, it is harder for people on the spectrum to predict what will happen next and to deal with unexpected situations. This can lead to anxiety, stress and burnout.\r\n\r\nIf your tech job is constantly causing errors in the predictive model of the world, it will be hard to do your job and to be happy at work. In this talk I will explain how the predictive brain theory can help us understand autism and how we can build better technology and workplaces for people on the spectrum.", "recording_license": "", "do_not_record": false, "persons": [{"code": "LUKY9J", "name": "Dennie Declercq", "avatar": "https://pretalx.com/media/avatars/LUKY9J_pH8R7j4.webp", "biography": "Dennie is Microsoft MVP in AI and Developer Technologies and has experience in accessibility with Microsoft technologies. In daily life Dennie is president and developer at DDSoft, a nonprofit that connects IT to People who are less tech-savvy. Dennie invented technical solutions and systems to help people with disabilities to participate in their daily life. Thanks to his autism he's the right man at the right spot to contribute as a volunteer in function of people with disabilities.", "public_name": "Dennie Declercq", "guid": "69493de4-7810-56e6-a527-0bab05fa4f13", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/LUKY9J/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/BRCNB7/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/BRCNB7/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/BRCNB7/resources/Dennie_5K3oEWE.pdf", "type": "related"}]}, {"guid": "fa865128-eed8-5e75-a429-d2bdf874e35e", "code": "HQBC7R", "id": 85747, "logo": null, "date": "2026-04-16T15:45:00+02:00", "start": "15:45", "duration": "00:30", "room": "Helium [3rd Floor]", "slug": "pyconde-pydata-2026-85747-rediscovering-single-node-processing-when-does-it-make-sense-to-move-from-spark-to-polars", "url": "https://pretalx.com/pyconde-pydata-2026/talk/HQBC7R/", "title": "Rediscovering single-node processing: When does it make sense to move from Spark to Polars?", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk", "language": "en", "abstract": "As data engineers, we are used to spinning up a Spark Cluster every time we want to do data processing and handle the overhead that comes with using such a mighty framework. But is this really necessary? In this talk I will argue that single-node processing with Polars is in many cases easier and cheaper. I will compare a typical ETL & Feature Engineering task in Spark and in Polars and offer a pragmatic opinion on when to use one or the other.", "description": "Apache Spark is the industry standard for big data processing, rightfully so. But for many data processing applications, a more light-weight solution will work just as well, avoiding Spark's compute and configuration overhead. Polars offers such a solution, with a fast single-node processing engine and a syntax that will pose no problems for experienced Spark developers.\r\nI will give a short comparison of Spark and Polars, where they have similarities and differences and show an implementation of a typical ETL and Feature Engineering task in both. I will compare the deployment, performance and cost of the two and, while giving my opinion on the topic, hope to enable you to also make an informed decision on when you want to use Polars and when to use Spark.", "recording_license": "", "do_not_record": false, "persons": [{"code": "TP9MH3", "name": "Jonas B\u00f6er", "avatar": "https://pretalx.com/media/avatars/TP9MH3_4qcDvtk.webp", "biography": "Data Engineer at inovex since 2022, full-time software engineer since 2018, coder for as long as I can remember. With my experience working on data warehouses and machine learning applications from small-scale tests up to international deployments, I enjoy eliminating bugs and bottlenecks, getting cool systems online and writing beautiful code. Still proud of the time when a colleague complained that deploying to production has become too boring and is no longer a thrilling adventure because of me.", "public_name": "Jonas B\u00f6er", "guid": "8f849840-6fe1-5e72-a5e1-81076fdee3b6", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/TP9MH3/"}], "links": [{"title": "Slides on GitHub", "url": "https://github.com/morgil/pycon26-talk-rediscovering-single-node-processing", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/HQBC7R/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/HQBC7R/", "attachments": [{"title": "Presentation Slides", "url": "/media/pyconde-pydata-2026/submissions/HQBC7R/resources/Redisc_UEvQbuz.pdf", "type": "related"}]}], "Platinum [2nd Floor]": [{"guid": "5c2ff1c8-0a3a-56c1-b75e-bfbe43a5a279", "code": "P7NYXB", "id": 87619, "logo": null, "date": "2026-04-16T10:15:00+02:00", "start": "10:15", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-87619-tracking-knowledge-diversity-in-llm-generated-responses", "url": "https://pretalx.com/pyconde-pydata-2026/talk/P7NYXB/", "title": "Tracking Knowledge Diversity in LLM-Generated Responses.", "subtitle": "", "track": "PyData: Natural Language Processing & Audio (incl. Generative AI NLP)", "type": "Talk", "language": "en", "abstract": "As large language models (LLMs)-powered \u201cAI highlights\u201d become the first information people see on the Web, a key question arises: how much variety and perspective do these systems actually deliver for information-seeking queries? Do LLMs offer broader viewpoints than traditional search or Wikipedia pages? Do larger models really produce more diverse answers\u2014or are they all converging on the same language, and framing, raising concerns about \u201cknowledge collapse\u201d?\r\n\r\nDrawing insights from experiments across LLM families, real-world topics, and hundreds of user-style prompts, this talk introduces an open-source framework for benchmarking and tracking epistemic diversity in LLMs. We focus on practical lessons for data scientists building and evaluating LLM-powered search, summaries, and knowledge systems\u2014where diversity of information actually matters.", "description": "This talk summarizes our research on how LLMs generate narratives and recurring tropes in real-world information-seeking setups via prompting.\r\n\r\n**Talk outline:**\r\n* Knowledge collapse and epistemic diversity: What they mean and why they matter for real-world information access (5 mins).\r\n* Framework overview: How we measure epistemic diversity across LLM outputs (5 mins).\r\n* Experimental design, results: Curating dataset for comparisons across model families, search results, and Wikipedia pages (7 mins).\r\n* Implications for designing LLM-powered systems that preserve information diversity (10 mins)\r\n\r\n**Key takeaways for AI practitioners:**\r\n* When can retrieval-augmented generation (RAG) increase diversity?\r\n* Can expanding Wikipedia via translation improve epistemic diversity or reinforce existing tropes?\r\n* What are some open challenges in measuring cultural and contextual diversity in LLM outputs?\r\n* Where are we headed in terms of model sizes, fluency, and breadth of knowledge?\r\n\r\n**Useful links:**\r\n* [Our open source framework](https://github.com/dwright37/llm-knowledge)\r\n* [Reproducible Data Hugging Face](https://huggingface.co/datasets/dwright37/llm-knowledge-collapse)\r\n* [Our Research paper](https://arxiv.org/abs/2510.04226)", "recording_license": "", "do_not_record": false, "persons": [{"code": "EKABAA", "name": "Sarah Masud", "avatar": "https://pretalx.com/media/avatars/EKABAA_hA6qeuY.webp", "biography": "Sarah Masud is currently a postdoc at the University of Copenhagen, exploring stereotypes and narratives. During her PhD from Indraprastha Institute of Information Technology, New Delhi, she explored the role of different context cues in improving computational hate speech-related tasks", "public_name": "Sarah Masud", "guid": "40981cf1-e665-5d05-a704-bb5f23e185ba", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/EKABAA/"}], "links": [{"title": "Research Paper", "url": "https://arxiv.org/abs/2510.04226", "type": "related"}, {"title": "Github Repo", "url": "https://github.com/dwright37/llm-knowledge", "type": "related"}, {"title": "Full Dataset", "url": "https://huggingface.co/datasets/dwright37/llm-knowledge-collapse", "type": "related"}, {"title": "Slides for the Pycon DE talk on Speaker Deck", "url": "https://speakerdeck.com/_themessier/tracking-knowledge-diversity-in-llm-generated-responses", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/P7NYXB/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/P7NYXB/", "attachments": [{"title": "Slides for the Pycon DE talk on Pertalx", "url": "/media/pyconde-pydata-2026/submissions/P7NYXB/resources/LLM_Di_0iHvbmf.pdf", "type": "related"}]}, {"guid": "463f7fbc-dd38-5001-bb7e-5c388f440037", "code": "FP7YN7", "id": 85948, "logo": null, "date": "2026-04-16T10:55:00+02:00", "start": "10:55", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-85948-are-we-free-threaded-ready-looking-at-where-free-threaded-python-fails", "url": "https://pretalx.com/pyconde-pydata-2026/talk/FP7YN7/", "title": "Are we free-threaded ready? Looking at where free-threaded Python fails", "subtitle": "", "track": "PyCon: Python Language & Ecosystem", "type": "Talk", "language": "en", "abstract": "Free-threaded Python aims to significantly improve performance, allowing multiple native threads to execute Python bytecode concurrently. In this talk, we will explore the current state of Python's free-threading initiative and assess its practical readiness for widespread adoption.", "description": "We begin by exploring the background of free-threaded Python, summarising its origins, current status, and the technical differences distinguishing it from standard Python implementations. A key focus will be examining the compatibility landscape, specifically investigating how many popular third-party libraries are currently prepared for free-threading. We will distinguish between generic pure Python wheels and explicitly free-threaded wheels and I\u2019ll explain how the community can contribute to compatibility verification. \r\n\r\nWe then critically discuss free-threaded Python's necessity, weighing the disadvantage of increased thread safety concerns (and verification methods) against the promised advantage of speed (including multithreaded profiling).Will free-threaded Python become a critical future direction for the language? How can you contribute? If and how specific projects can immediately benefit from it? Let\u2019s find out together!", "recording_license": "", "do_not_record": false, "persons": [{"code": "8EGVC9", "name": "Cheuk Ting Ho", "avatar": "https://pretalx.com/media/avatars/8EGVC9_LbezfQb.webp", "biography": "After having a career as a Data Scientist and Developer Advocate, Cheuk dedicated her work to the open-source community. Currently, she is working as a developer advocate for JetBrains. She has co-founded Humble Data, a beginner Python workshop that has been happening around the world. Cheuk also started and hosted a Python podcast, PyPodCats, which highlights the achievements of underrepresented members in the community. She has served the EuroPython Society board for two years and is now a fellow and director of the Python Software Foundation.", "public_name": "Cheuk Ting Ho", "guid": "716d26c2-170b-5a5e-86e5-9d4cecf3bbdd", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/8EGVC9/"}], "links": [{"title": "Slides", "url": "https://canva.link/free-threaded-py", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/FP7YN7/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/FP7YN7/", "attachments": []}, {"guid": "22ea32aa-2fdc-5b05-a2fb-606e1d663ae7", "code": "VBPRQR", "id": 88347, "logo": null, "date": "2026-04-16T11:35:00+02:00", "start": "11:35", "duration": "00:45", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-88347-designing-and-scaling-a-python-library-in-the-open-architecture-automation-and-community", "url": "https://pretalx.com/pyconde-pydata-2026/talk/VBPRQR/", "title": "Designing and Scaling a Python Library in the Open: Architecture, Automation and Community", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk (long)", "language": "en", "abstract": "Designing a Python library that scales over time requires more than clean code. In this talk, we present ScanAPI, an open-source Python library for automated API integration testing and live documentation, as a case study in sustainable library design.\r\n\r\nWe explore how architectural decisions, Python features, and automation pipelines help reduce maintenance costs while improving developer experience. We also share how open collaboration and community practices turn a Python library into a long-term, scalable project.\r\n\r\nAttendees will leave with practical patterns to apply when building or evolving Python libraries in the open.", "description": "Building a Python library that remains reliable, maintainable, and welcoming to contributors is a challenge many projects face as they grow. This session presents ScanAPI as a real-world case study of how thoughtful engineering and automation can support both technical scalability and open source sustainability.\r\n\r\nScanAPI is an open-source Python library that enables automated API integration testing and live documentation using declarative specifications. Distributed via PyPI and actively maintained, the project has been adopted by developers across different contexts and was recognized by GitHub as part of initiatives focused on securing the open source supply chain.\r\n\r\nRather than focusing on abstract best practices, this talk dives into concrete engineering decisions made while designing and maintaining the library.\r\n\r\nWhat we will cover:\r\n\r\n1. Designing a Python Library for Growth\r\n- How the codebase is structured to separate configuration, execution, and reporting\r\n- Organizing modules and public APIs to remain stable over time\r\n- Packaging decisions and CLI design for ease of use\r\n\r\n2. Using Python Features Effectively\r\n- Configuration-driven workflows with YAML and JSON\r\n- Validation, error handling, and predictable failures\r\n- Type hints and interfaces to improve readability and contributor confidence\r\n\r\n3. Automation as a First-Class Concern\r\n- Continuous integration with GitHub Actions\r\n- Unit and integration testing strategies\r\n- Automated releases, versioning, and dependency management\r\n\r\n4. Developer Experience and Adoption\r\n- Documentation and live reports as part of the product, not an afterthought\r\n- Lowering the barrier for new users and contributors\r\n- Tooling choices that reduce cognitive load\r\n\r\n5. Community and Sustainability\r\n- Contribution guidelines and governance models\r\n- How open collaboration scales better than individual ownership\r\n- The role of the Cumbuca Dev open source community in sustaining the project\r\n\r\nBy the end of the talk, attendees will have a clear mental model for designing Python libraries that can scale technically and socially. The lessons shared are applicable to anyone maintaining or planning to publish Python libraries, whether in personal projects, companies, or community-driven initiatives.", "recording_license": "", "do_not_record": false, "persons": [{"code": "GXBKTT", "name": "Camila Maia", "avatar": "https://pretalx.com/media/avatars/GXBKTT_X3YRK7p.webp", "biography": "Brazilian software engineer, open source maintainer, and co-founder of Cumbuca Dev, a community-driven initiative that supports underrepresented people entering and thriving in technology through real-world practice, open source collaboration, and education. With over a decade of professional experience, Camila focuses on backend engineering, developer experience, tooling and automation.\r\n\r\nShe is the creator and core maintainer of ScanAPI, a Python library for automated API integration testing and live documentation that has gathered widespread adoption and community contributions. ScanAPI has been recognized by GitHub as part of initiatives to strengthen the open source supply chain and is used by developers internationally. Camila\u2019s work spans not only code but also documentation, automation pipelines, and contributor experience practices that make open source projects more sustainable.\r\n\r\nCamila was the first Brazilian accepted into the GitHub Sponsors program, breaking new ground for maintainers in her country. She is also featured as one of ~50 global open source maintainers in the maintane.rs project, invited by the Open Source Initiative (OSI) to share her personal journey and perspectives on how open source can unlock opportunities in tech.\r\n\r\nHer engagement extends to speaking and mentoring at technical conferences around the world, including Pyjamas, EuroPython, Python Brasil, DjangoCon EU, and others, where she has presented both talks and hands-on workshops. \r\n\r\nThrough Cumbuca Dev, Camila advocates for practical learning and structured contributions as pathways to real experience, helping people from diverse backgrounds build skills, confidence, and visibility before their first job. She believes that open source is not just code \u2014 it is a vehicle for community, opportunity, and empowerment \u2014 and her work reflects a commitment to making technology spaces more accessible, collaborative, and humane.\r\n\r\nPeople > Tech \ud83d\udc9c", "public_name": "Camila Maia", "guid": "d72f6d10-da88-5169-9be6-9141a3a00fba", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/GXBKTT/"}], "links": [{"title": "Canva link", "url": "https://canva.link/696vllybljnpl4c", "type": "related"}, {"title": "SpeakerDeck PDF file", "url": "https://speakerdeck.com/cumbucadev/2026-pycon-de-designing-and-scaling-a-python-library-in-the-open-architecture-automation-and-community", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/VBPRQR/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/VBPRQR/", "attachments": []}, {"guid": "34910107-df79-5fe1-b109-41309f9abe53", "code": "FJQXEQ", "id": 85467, "logo": null, "date": "2026-04-16T13:20:00+02:00", "start": "13:20", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-85467-increase-productivity-of-cnc-machining-of-aerospace-engine-parts-with-python", "url": "https://pretalx.com/pyconde-pydata-2026/talk/FJQXEQ/", "title": "Increase productivity of CNC-machining of aerospace engine parts with Python", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk", "language": "en", "abstract": "Increasing unit labour costs and the imperative need to reduce energy consumption raises the necessity to enhance productivity in industrial production. Python is an excellent tool for GKN Aerospace, as the world\u2019s leading tier one aerospace supplier, to address the needs for higher utilization and unmanned operation on the shopfloor on its site in Kongsberg, Norway.\r\n\r\nAs an example, the presentation shares insight into the in-house developed \u201cProduction Execution System\u201d, consisting of a Python backend and a REACT frontend. The application orchestrates all necessary data on cell-level, like NC-programs and additional digital services of the company\u2019s IT environment during unmanned production. Furthermore, it supports the operator with necessary information to ensure highest quality of engine parts in a work environment of increasing digitalization and workload.", "description": "Python is not exclusively a powerful tool for datascience and web-development. It gains in importance on the shopfloor in industrial production, too. Increasing unit labour costs and the imperative need to reduce energy consumption raises the need to enhance productivity in industrial production in general.\r\n\r\nFor GKN Aerospace, as the world\u2019s leading tier one aerospace supplier of systems and components, this leads to higher utilization and unmanned operation of a high variety of production processes and CNC-machining tools. On its site in Kongsberg, Norway, GKN produces mainly turbine shafts and casings for civil and military engines, which are used for up to 100.000 flights every day around the globe.\r\n\r\nThe programming language Python enables fast and forward-thinking development of powerful applications which supports the operators and increases the degree of automation in the next years by concurrent assurance to fulfill demanding quality requirements and to cope the workload.\r\n\r\nStandardization of applications is a key factor for increasing robustness of the automated production and reduce maintenance effort. Therefore a standardized interface to NC-controller and PLC\u2019s from the \u201cpre ASCII era\u201d in the 70ies to up-to-date systems had to be found and suitable gateway services had to be developed.\r\n\r\nAs an example for the usage of Python on the GKN shopfloor in Norway, a standardized in-house developed \u201cProduction Execution System\u201d, consisting of a Python backend and a REACT frontend is presented. Connected to more than 25 different machining tools on the shopfloor and additional digital services of the company\u2019s IT environment, the application orchestrates all necessary data on cell-level. It provides data, like NC-programs and part meta-data to the machining tools and additional process information to the operators, necessary to produce a high variety of different engine components.\r\n\r\nIt enables unmanned production by selecting the next part for machining and commanding part changes to the machine to maximize utilization of the means of production. Furthermore, it reports back collected data for usage by other processes downstream in production.\r\n\r\nCloud-based and due to its restart capability in running production, bugfixes and implementation of new features can be carried out \u201con-the-fly\u201d. Thereby automated unit- and integration testing for the core functionalities ensure robustness for the high variety of used machining tools.\r\n\r\nThe development and usage of the \u201cProduction Execution System\u201d on GKN Aerospace\u2019s shopfloor are an excellent example for the increasing importance of the programming language Python to ensures highest quality of engine parts in a work environment of increasing digitalization and workload.", "recording_license": "", "do_not_record": false, "persons": [{"code": "APWPGK", "name": "Nico Buhl", "avatar": "https://pretalx.com/media/avatars/APWPGK_RLHVuTf.webp", "biography": "With a background as mechanical engineer and a PhD in material science, I describe myself as a mechanical engineer who can program and not as a software engineer. Started programming in Python, Perl, PHP and C++ as a pupil for fun in the end of the 90ths, I lost track of programming for some years during mechanical studies just to rediscover Python several years ago as a perfect tool for engineers to solve real-world problems in industrial production. The huge eco-system of Python and the intuitive syntax open new opportunities to me to combine domain knowledge in industrial processes with digitalization solution approaches. I enjoy sharing this with all people interested in improving industrial production in context of digitalization.", "public_name": "Nico Buhl", "guid": "9160750d-186f-5b70-8008-290aabbff2f4", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/APWPGK/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/FJQXEQ/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/FJQXEQ/", "attachments": []}, {"guid": "650a6e62-fd80-5b3e-8c74-64b1ecc37968", "code": "GMNE3E", "id": 88409, "logo": null, "date": "2026-04-16T14:00:00+02:00", "start": "14:00", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-88409-making-bad-clis-fun-with-small-language-models", "url": "https://pretalx.com/pyconde-pydata-2026/talk/GMNE3E/", "title": "Making bad CLIs fun with Small Language Models", "subtitle": "", "track": "PyData: Natural Language Processing & Audio (incl. Generative AI NLP)", "type": "Talk", "language": "en", "abstract": "Command Line Interfaces (CLIs) offer an efficient and powerful way to interact with software, but poorly designed interfaces can be incredibly frustrating. Complicated parameter names and unconventional formats can turn using a great tool into a burdensome experience.\r\n\r\nLarge Language Models (LLMs) seem like a great solution to this problem as they can easily add a natural-language interface to any CLI. However, LLMs can introduce their own challenges, such as requiring API keys or high-performance GPUs. In this talk, I'll demonstrate a method for creating natural-language interfaces for any CLI using fine-tuned Small Language Models. These models are lightweight enough to be run directly on laptops or even smartphones.\r\n\r\nWe'll explore the process of generating synthetic data, fine-tuning models, and evaluating their performance using both an in-house CLI and a well-known open-source package as examples.", "description": "I've often had to rely on a poorly designed home-grown CLI, leading to frustration due to constantly forgetting argument names and allowable values. While Large Language Models (LLMs) initially appeared to be an ideal fix, their limitations quickly became evident, suggesting the need for a more efficient approach.\r\n\r\nTo begin, we'll have a look at what makes CLIs hard to use and articulate why LLMs fall short in addressing them. Following this, we'll examine the process of generating synthetic data tailored for any CLI, whether it's proprietary or open-source. Then, I'll show you how to use this synthetic dataset to fine-tune a Small Language Model on your laptop or in the cloud. We will use the smallest variant of Google's Gemma 3 models, which boasts a lean 270 million parameters, to transform natural language instructions into actionable CLI commands.\r\n\r\nLastly, I'll share benchmark results to illustrate that these models can operate smoothly on various machines without needing API keys or GPUs, showcasing their robust capability and practical deployment potential.", "recording_license": "", "do_not_record": false, "persons": [{"code": "UUVSCB", "name": "Moritz Bauer", "avatar": "https://pretalx.com/media/avatars/UUVSCB_WjWlX2K.webp", "biography": "Moritz Bauer is a Senior Data Scientist at Blue Yonder, where he currently develops software for demand forecasting. In a previous career, he obtained a Ph.D.  in high-energy particle physics and contributed research to the Belle II flavor physics experiment at KEK.\r\n\r\nWhile demand forecasting works very well without language models, he can't escape the fascination of modern AI and is always looking for excuses to spend some time in this domain.", "public_name": "Moritz Bauer", "guid": "c8085f5e-5a1a-549e-b410-3081630f5d3d", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/UUVSCB/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/GMNE3E/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/GMNE3E/", "attachments": [{"title": "Smal Language Model Slides", "url": "/media/pyconde-pydata-2026/submissions/GMNE3E/resources/Moritz_KUdR2Oe.pdf", "type": "related"}]}, {"guid": "34973294-aa0f-5aa1-b7c0-d3feb9133f9f", "code": "MQJVFU", "id": 85801, "logo": null, "date": "2026-04-16T15:05:00+02:00", "start": "15:05", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-85801-ai-evals-done-right-from-vibes-to-confident-decisions", "url": "https://pretalx.com/pyconde-pydata-2026/talk/MQJVFU/", "title": "AI Evals Done Right: From Vibes to Confident Decisions", "subtitle": "", "track": "PyData: Generative AI & Synthetic Data", "type": "Talk", "language": "en", "abstract": "Testing traditional software is \"simple\"... same input, same output. LLMs? Not so much. Same prompt, different result every time. So how do you actually know if your AI product is good?\r\n\r\nMost teams struggle with this. Generic metrics like \"Helpfulness: 4.2\" sound scientific but don't drive real decisions. And when a new model releases, it's weeks of debates instead of data.\r\n\r\nThis talk introduces Error Analysis: a methodology to discover the concrete failure modes of your AI product and turn them into measurable evals. You'll learn how to build a failure taxonomy that enables real prioritization. Which issues are critical? Which are frequent? What should developers fix next, and how do you measure success?\r\n\r\nThe payoff: A real quality number for stakeholders. Concrete improvement tasks for developers. And when a new model drops, a ship-or-skip decision within 24 hours based on actual data.\r\n\r\nExpect a meme-powered walkthrough, real-world examples from production, and a clear path to implement this yourself starting with just 20 traces.", "description": "Testing traditional software is \"simple\"... same input, same output. LLMs? Not so much. Same prompt, different result every time. So how do you actually know if your AI product is good?\r\n\r\nSpoiler: Most teams don't. They ship on vibes and hope for the best.\r\n\r\nThis talk takes you through our real journey at Blue Yonder, where we built an LLM-powered analytics system and needed a way to actually measure its quality. You'll see how we went from \"feels okay-ish\" to concrete numbers that let us make real decisions - with actual examples from production along the way.\r\n\r\nThe methodology is called Error Analysis: collect traces, annotate them from the user's perspective, group similar issues into failure modes, and turn those into automated evals. Along the way, we'll share practical best practices like why binary Pass/Fail beats rating scales, and why 100% pass rate means your evals are broken.\r\n\r\nThe payoff? When a new model drops, we run our pipeline and know within hours - not weeks - whether it's better or worse for our specific use case. Real percentages. Real trade-offs. Real decisions.\r\n\r\nExpect a meme-powered walkthrough and a clear path to implement this yourself starting with just 20 traces.\r\n\r\nOutline:\r\n- Introduction: The challenge of testing stochastic systems, why we needed a better approach\r\n- Collecting and Annotating Traces: Every trace is a user experiencing your product, Open Coding from the user perspective, real examples of failure modes we discovered\r\n- Building the Failure Taxonomy: Grouping observations into categories, Axial Coding, turning scattered comments into actionable failure modes\r\n- Writing Evals That Work: LLM-as-judge setup, binary scores vs rating scales, validating against human judgment\r\n- From Vibes to Decisions: Prioritizing what to fix, measuring improvement, 24-hour model benchmarking\r\n- Wrap-up: Your action plan, start with 20 traces", "recording_license": "", "do_not_record": false, "persons": [{"code": "9FLCR9", "name": "Martin Seeler", "avatar": "https://pretalx.com/media/avatars/9FLCR9_LJP70oq.webp", "biography": "Martin Seeler supercharges global supply chains with GenAI as Sr Staff AI Engineer at Blue Yonder. He ships AI that survives angry customers, skeptical executives, and Black Friday traffic. Speaks globally about the messy reality of production AI. Measures success in customer value delivered.", "public_name": "Martin Seeler", "guid": "379a916e-b3b2-5066-a04c-63729babd9a2", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/9FLCR9/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/MQJVFU/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/MQJVFU/", "attachments": [{"title": "Presentation Slides", "url": "/media/pyconde-pydata-2026/submissions/MQJVFU/resources/AI_Eva_948O8gG.pdf", "type": "related"}]}, {"guid": "feb25566-699b-508a-b923-9589aed359bc", "code": "NAHX3L", "id": 86866, "logo": null, "date": "2026-04-16T15:45:00+02:00", "start": "15:45", "duration": "00:30", "room": "Platinum [2nd Floor]", "slug": "pyconde-pydata-2026-86866-restaurants-around-train-stations-are-bad-and-i-can-prove-it", "url": "https://pretalx.com/pyconde-pydata-2026/talk/NAHX3L/", "title": "Restaurants around train stations are bad and I can prove it", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk", "language": "en", "abstract": "Have you ever asked yourself: Why is there no good food option close to this main station? This talk tries to find out if this is a systematic problem - using publicly available data and Google APIs.\r\n\r\nAfter this talk, you will know about the best- and worst-rated restaurants close to main stations in Germany, if kebabs or pizza places are systematically a better choice, and which station is the worst to eat in all of Germany.", "description": "Does the quality of restaurants degrade with your proximity to a train station? And which German town is worst for the hungry traveller? In this culinary data exploration, we used publicly accessible data to assess whether busy train stations correlate with lower restaurant ratings - and which towns are actually the worst. Using the Google Maps API and the hottest framework for data manipulation, polars, we give an overview over publicly available data resources and show how far you can get with them.\r\n\r\nOf course, this talk will also deliver all the cold hard food facts: Analyzing the data of over 10,000 restaurants in Germany and worldwide, we will present the best and worst dining options available at train stations. We compare urban and rural environments, examine the impact of chain stores, and provide practical advice for you, the hungry traveler.", "recording_license": "", "do_not_record": false, "persons": [{"code": "GKF8H3", "name": "Dennis Schulz", "avatar": "https://pretalx.com/media/avatars/GKF8H3_6iywM6W.webp", "biography": "Dennis Schulz is a Senior Consultant at TNG Technology Consulting. He holds a PhD in low temperature physics from the University of Heidelberg. Besides being a programmer, he organized and hosted the TV show Quasi Klar for RNF, published a book that was translated to Korean and Russian, and won Science Slam competitions all over Germany. As a part of the Innovation Hacking team at TNG, he worked on different AI showcases, fine-tuning embeddings, and data mining.", "public_name": "Dennis Schulz", "guid": "a72c126b-b04d-57c7-bdd0-b52f293769a0", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/GKF8H3/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/NAHX3L/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/NAHX3L/", "attachments": []}], "Europium [3rd Floor]": [{"guid": "8cf167f3-2152-5d48-b756-e45917f73bcd", "code": "EWZMJK", "id": 87716, "logo": null, "date": "2026-04-16T10:15:00+02:00", "start": "10:15", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-87716-don-t-call-your-llm-too-often-how-to-build-your-dialog-graph-with-confidence-and-sleep-at-night", "url": "https://pretalx.com/pyconde-pydata-2026/talk/EWZMJK/", "title": "Don\u2019t call your LLM too often! How to build your dialog graph with confidence and sleep at night.", "subtitle": "", "track": "PyData: Natural Language Processing & Audio (incl. Generative AI NLP)", "type": "Talk", "language": "en", "abstract": "Keywords: **Explainable AI, enhanced RAG, GraphRAG, LLMOps, dialog system evaluation.**\r\n\r\nDesigning reliable dialog flows for LLM-based systems remains challenging once conversations require branching, correction, or multi-step reasoning. Dialog graphs often evolve organically and accumulate structural issues: endless correction loops, dead subpaths, redundant validation steps, overly generic catch-all branches, or linear sequences that should be collapsed. Such phenomena raise operational costs, significantly increase TTFT and make the system answer less predictable and explainable.\r\n\r\nMany solutions try to introduce an all-fit generalized RAG retrieval solution. Contrary to this, we present our empirical learnings on how to enhance system speed, lower overall costs and offer a better dialog graph explainability through enhanced LLM call tracing and iterative enhancements for common dialog paths.\r\n\r\nWe also show that more elaborated knowledge retrieval strategies like GraphRAG may drastically enhance overall response quality and shorten the dialog graph. We evaluate several approaches and give recommendations on how to leverage more complex document indexing phases for inference time benefits.\r\n\r\nOverall, the session argues that scalable conversational systems require not only better prompts, but explicit graph structures paired with rigorous tracing and data-driven optimization.", "description": "Building reliable dialog flows for LLM-based conversational systems remains difficult once interactions move beyond linear question\u2013answer patterns. While early prototypes often rely on prompt chains, real-world systems quickly require branching, correction, clarification, and multi-step reasoning. At this stage, dialog logic implicitly turns into a graph, yet is still implemented and reasoned about as a sequence. This mismatch leads to structural problems that are hard to detect without explicit modeling and observability.\r\n\r\nComplex document retrieval systems are not born out of theoretical itch. We\u2019ll exemplify practical problems framing them around the following practical use case from the area of electricity/power production.\r\n\r\n*Use Case: Aladdin and the Case of the Almost-Exploding Power Plant*\r\n\r\nRick and Morty are operations engineers at a large electrical power plant. Every single day, they face the same heroic challenge: too many documents, too little clarity.\r\n\r\nThe technical staff produces a constant stream of operational reports: free-text summaries describing the health and performance of steam generators. These reports are rich in knowledge, but poor in structure. Rick\u2019s daily ritual is to read, compare, and summarize them, trying to predict which units will soon need maintenance. If he gets it right, the plant saves money by avoiding unnecessary service routines which are prescribed by regular maintenance guidelines. If he gets it wrong\u2026 well, let\u2019s just say steam generators have a dramatic way of expressing dissatisfaction.\r\n\r\nBut unstructured reports are only one part of the story. Alongside them exists a well-behaved, structured world: databases containing results of regular, non-invasive ultrasonic inspections of pipelines, used to track corrosion development over time. Morty has built a quantitative model that predicts the probability (and timing out of this probability) of a pipeline rupture based on these corrosion measurements.\r\n\r\nNaturally, Rick and Morty want everything. They want one system that can: 1) Understand messy human-written reports, 2) Reason over numerical corrosion models, and 2) Answer simple document questions without investing into unnecessary intelligence.\r\n\r\n\r\nThus, the system Aladdin is born.\r\n\r\nAladdin combines three very different subsystems:\r\n\r\n  - An agentic indexing component, which dynamically builds a search index for a GraphRAG over heterogeneous documents, given a pre-defined graph structure.\r\n  - An autonomous analytical agent, which evaluates pipeline failure probabilities using Morty\u2019s quantitative corrosion model.\r\n  - A lightweight text-based RAG, backed by a vector index, for fast and simple document retrieval.\r\n\r\nBut what is the challenge? Once these components start talking to each other, the dialog graph becomes unpredictable. Execution paths depend heavily on what information is actually present in the documents. And this is something that cannot be fully reasoned about in advance. Loops appear, branches explode, and theoretically \u201cclean\u201d dialog designs fail in practice.\r\n\r\nThis use case illustrates why observability, tracing, and empirical optimization of dialog graphs are essential when building real-world document retrieval systems for industrial environments. Especially when Rick just wants a straight answer and Morty really doesn\u2019t want another pipeline incident on his watch.\r\n\r\nGiven this use case we will exemplify several structural pathologic cases in the dialog graph which we observed in the practice and for which we found curative approaches.\r\n\r\n**Non-ending loops in the dialog graph**\r\nA frequent failure mode is the emergence of endless circular dialog graphs. Typical examples include:\r\n  - correction loops (\u201cPlease rephrase your input\u201d \u2192 user rephrases \u2192 validation fails again \u2192 same prompt),\r\n  - clarification cycles (\u201cWhat do you mean by X?\u201d \u2192 partial answer \u2192 same clarification),\r\n  - fallback loops where a generic catch-all path routes the conversation back to an earlier state without introducing new information.\r\n\r\nSuch cycles are rarely intentional; they arise from local fixes applied over time and are difficult to identify by prompt inspection alone. In production, they manifest as stalled conversations, increased latency, rising token costs, and user frustration.\r\n\r\nBeyond circularity, several other structural pathologies commonly appear in document retrieval systems.\r\n\r\n**Dead subpaths after non-matching branching conditions**\r\n\r\nDialog graphs often include branches guarded by semantic or data-dependent conditions, but changes in document structure, embeddings, or preprocessing can make these conditions unsatisfiable, creating dead subpaths that are never executed. These paths are dangerous because they give a false sense of coverage, increase maintenance and reasoning complexity, and in production often manifest as mysterious fallback behavior where the system always takes a default route instead of a specialized one.\r\n\r\n**Redundant validation and re-validation steps**\r\n\r\nAnother common issue is redundant validation, where the same or equivalent checks are performed multiple times along a single dialog path. This often happens when validation logic is added defensively at multiple layers: once at input parsing, again before retrieval, and again before response generation. While each validation step may seem harmless in isolation, their combination leads to inflated dialog depth, unnecessary latency, and increased cognitive load when analyzing traces. Worse, slight inconsistencies between validation prompts can produce contradictory outcomes, for example, an input being accepted in one step and rejected in the next.\r\n\r\n**Overly generic catch-all branches**\r\n\r\nCatch-all branches are often introduced as a safety mechanism: a \u201cdefault\u201d path that handles unexpected input or retrieval failure. Over time, however, these branches tend to grow in scope and responsibility, eventually becoming overly generic handlers that do everything. Such branches blur the distinction between genuinely exceptional situations and routine cases. As more logic is added to the catch-all path, it becomes harder to reason about what the system is actually responding to. Specialized logic may be silently bypassed, while unrelated scenarios are forced through the same generic response strategy.\r\n\r\n**Linear sequences that should be collapsed**\r\n\r\nMany dialog graphs contain long linear chains of nodes with no branching, no state changes, and no observable side effects between steps. These sequences often originate from iterative prompt development, where small transformations are added one by one (\u201cextract entities\u201d \u2192 \u201cnormalize entities\u201d \u2192 \u201crephrase query\u201d \u2192 \u201ccheck relevance\u201d). While conceptually clean, such linear chains are rarely optimal. They increase token usage, latency, and the number of failure points, without adding expressive power. More importantly, they obscure the true logical structure of the system: what could be a single semantic transformation is spread across multiple opaque steps.\r\n\r\nAn additional aspect of an overcomplicated dialog graph - especially baked by an autonomous agent - are barely predictable costs. Autonomous parts of the system need a very tight observability net to stay under control and not to burst cost prediction by an order of magnitude.\r\n\r\nWorking within a specifically regulated environment of a power plant posts additional restrictions on the explainability of the results. Every fact must be trackable to the source of the information and model hallucinations must be recognized in the very early step. \r\n\r\nAll the above requirements result in a setup which is heavily based on an LLM Operating Platform like Langfuse. \r\n\r\nWhen combined with dialog-oriented orchestration frameworks such as Langflow, experiment tracking extends from single calls to full conversational trajectories. Complete dialog traces expose path stability, node utilization, dead branches, fallback prevalence, and user-facing metrics such as turns to resolution or correction-loop repetition.\r\n\r\nOver time, this empirical evidence replaces design-time assumptions. Dialog paths are merged or removed based on observed execution rather than theoretical intent, with unreachable branches, redundant validations, and unstable loops revealed directly through trace analysis. Dialog graph optimization thus becomes a continuous, reproducible process grounded in measured behavior.\r\n\r\nThis talk proposes an engineering-oriented approach that models conversational logic as explicit dialog graphs and treats execution traces as first-class data. Using Langfuse instrumentation, developers can analyze concrete execution paths\u2014branch frequency, loop formation, latency hotspots\u2014and compare alternative graph designs through aggregated metrics and A/B testing, enabling systematic optimization based on evidence rather than intuition.\r\n\r\n**To sum up:** using concrete production-oriented examples, the talk shows how graph-based dialog design improves multi-step retrieval, explainability, and robustness across languages. Endless correction loops are detected and eliminated, dead branches are pruned, and overly generic catch-all paths are replaced with targeted recovery strategies. The overall message is that scalable conversational systems require not just better prompts or larger models, but explicit dialog graphs combined with rigorous tracing and data-driven optimization.", "recording_license": "", "do_not_record": false, "persons": [{"code": "DNKUXX", "name": "Evgeniya Ovchinnikova", "avatar": "https://pretalx.com/media/avatars/DNKUXX_csY7u9k.webp", "biography": "About\r\nI build solutions that make technology work for people. With experience in AI, data, and automation, I turn real needs into tools that make work faster and smarter.\r\n\r\nTrained as a physicist, I moved into data science and innovation to make a more direct impact on real-world problems. Since then, I\u2019ve worked across telecommunications, energy, e-commerce, and insurance\u2014helping teams create technology that delivers real value.\r\n\r\nOne highlight was helping build a GenAI platform used by more than 48,000 people, saving over 2 million working hours every year. I also contributed to an intelligent system that helps over 20,000 employees share knowledge more easily and work together more effectively.\r\n\r\nI enjoy learning, improving, and working with others who want to make a difference. Let\u2019s connect and explore new ideas together.", "public_name": "Evgeniya Ovchinnikova", "guid": "38ef1177-b730-599e-9aa1-569ffa27ce95", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/DNKUXX/"}, {"code": "NYYQSC", "name": "Andrei Beliankou", "avatar": "https://pretalx.com/media/avatars/NYYQSC_X5ZCstA.webp", "biography": "I am the Technical Lead for Data & AI in the Energy Retail Team at E.ON Digital Technology. Unofficially, I describe myself as a Software Engineer with a Data affinity. I write both code and texts.\r\n\r\nBeing a member of the E.ON GenAI Core team, I've been developing Generative AI solutions for different business units within the E.ON family. Multilingual Search, LLMs, Agentic RAG, Knowledge Graphs - a small selection of buzzwords for our daily activities.\r\n\r\nMachines listen to me in SQL, Python, Ruby, AWK, Bash, YAML and (hopefully very soon) Rust. My beloved machines mostly live in the Azure Cloud. \r\n\r\nI am passionate about building technical teams around a goal and having fun crafting solid software.\r\n\r\nYou can talk to me in German, English, Russian, Polish, Ukrainian, Belarussian, Italian, and Spanish. Latin may also be worth trying.\r\n\r\nI still strongly believe in non-dumb statistical approaches to AI. The next step will only be possible through a combination of the humanities and science.", "public_name": "Andrei Beliankou", "guid": "b898f203-6222-54a9-8927-5f800a2bdd6b", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/NYYQSC/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/EWZMJK/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/EWZMJK/", "attachments": [{"title": "Don\u2019t call your LLM too often!", "url": "/media/pyconde-pydata-2026/submissions/EWZMJK/resources/PyCon__hRopliS.pdf", "type": "related"}]}, {"guid": "9b43aa38-363f-5bf8-8caa-549a1cbf8b94", "code": "AWMRFD", "id": 87718, "logo": null, "date": "2026-04-16T10:55:00+02:00", "start": "10:55", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-87718-vibe-nlp-for-applied-nlp", "url": "https://pretalx.com/pyconde-pydata-2026/talk/AWMRFD/", "title": "Vibe NLP for Applied NLP", "subtitle": "", "track": "PyData: Natural Language Processing & Audio (incl. Generative AI NLP)", "type": "Talk", "language": "en", "abstract": "One of the hardest parts of applied NLP has always been breaking down complex business problems into machine learning components. It's so hard because it requires domain expertise and reasoning about the specific use case, and it's the one thing technology couldn't fix. But what if we could take some of the learnings from AI-powered coding assistants and apply them to solving real-world NLP problems? In this talk, I'll show how we've built powerful assistants and tools to help developers solve NLP tasks using open-source software, and create modular solutions that are small, fast and fully data-private.", "description": "One of the hardest parts of applied NLP has always been breaking down complex business problems into machine learning components. It's so hard because it requires domain expertise and reasoning about the specific use case, and it's the one thing technology couldn't fix. But what if we could take some of the learnings from AI-powered coding assistants and apply them to solving real-world NLP problems? In this talk, I'll show how we've built powerful assistants and tools to help developers solve NLP tasks using open-source software, and create modular solutions that are small, fast and fully data-private.\r\n\r\nAt the core of it is an often overlooked idea: using LLMs to *build systems* instead of *as systems*. AI-powered coding assistants have transformed the way we build software \u2013 and they can be even more impactful for AI development itself and bridge the experience gap that's often holding teams back and causing projects to fail. In the talk, I will show you a new way of using generative models for AI development, and some practical examples of how to make \"Vibe NLP\" work for real-world problems", "recording_license": "", "do_not_record": false, "persons": [{"code": "FZKG9N", "name": "Ines Montani", "avatar": "https://pretalx.com/media/avatars/FZKG9N_7il65fA.webp", "biography": "Ines Montani is a developer specializing in tools for AI and NLP technology. She\u2019s the co-founder and CEO of [Explosion](https://explosion.ai) and a core developer of [spaCy](https://spacy.io), a popular open-source library for Natural Language Processing in Python, and [Prodigy](https://prodi.gy), a modern annotation tool for creating training data for machine learning models.", "public_name": "Ines Montani", "guid": "b60e58b3-bd41-534c-a286-22ae8481a00a", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/FZKG9N/"}], "links": [{"title": "Slides", "url": "https://speakerdeck.com/inesmontani/vibe-nlp-for-applied-nlp", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/AWMRFD/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/AWMRFD/", "attachments": []}, {"guid": "6ac2e705-8f06-5692-9a8b-83ebd6afb1ea", "code": "FT7V39", "id": 87676, "logo": null, "date": "2026-04-16T11:35:00+02:00", "start": "11:35", "duration": "00:45", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-87676-from-row-wise-to-columnar-speeding-up-pyspark-udfs-with-arrow-and-polars", "url": "https://pretalx.com/pyconde-pydata-2026/talk/FT7V39/", "title": "From Row-Wise to Columnar: Speeding Up PySpark UDFs with Arrow and Polars", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk (long)", "language": "en", "abstract": "Python UDFs often become the slowest part of PySpark pipelines because they run row-by-row and pay a high cost crossing the JVM\u2194Python boundary. Spark\u2019s Arrow-backed execution changes that cost model by moving data in columnar batches, which can reduce overhead and enable efficient, vectorized processing in Python.\r\n\r\nIn this session, we\u2019ll cover practical patterns for writing Arrow-friendly UDF logic and integrating it with fast Python execution engines that operate on Arrow data. We\u2019ll compare common approaches\u2014scalar UDFs, Pandas UDFs, Arrow-native UDFs, and table-shaped Arrow transforms\u2014then translate the results into a decision guide you can apply to production pipelines. Attendees will leave knowing when Arrow helps, when it doesn\u2019t, and how to design UDF-heavy transformations that scale.", "description": "Objective\r\nDemonstrate how to accelerate UDF-heavy PySpark workloads by switching from row-wise execution to Arrow-backed columnar execution, using Polars for fast, maintainable column transformations and table transformations.\r\n\r\nKey Takeways\r\n- How Arrow is being used in PySpark for batched, columnar data exchange\r\n- Why Polars helps: a higher-level DataFrame API plus Arrow interoperability that can often reuse Arrow buffers\r\n- How to design fast column transformations (column in \u2192 column out) and fast table transformations (batch/table in \u2192 batch/table out).\r\n- Benchmarks and tradeoffs across scalar UDFs, Pandas UDFs, Arrow-native UDFs, and Polars-based Arrow table transforms on real-world examples.\r\n\r\nAudience\r\n- Data engineers and data scientists working with PySpark at scale\r\n- Engineers seeking concrete strategies to optimize spark pipelines that rely on Python UDFs\r\n\r\nKnowledge Expected\r\n- Familiarity with PySpark DataFrames and UDFs\r\n- Basic understanding of Spark execution helps but is not required\r\n- Exposure to Polars/Arrow is not required but might be beneficial", "recording_license": "", "do_not_record": false, "persons": [{"code": "CD8CLV", "name": "Aimilios Tsouvelekakis", "avatar": "https://pretalx.com/media/avatars/CD8CLV_gka5c1T.webp", "biography": "Aimilios works as a software engineer for Frontiers Media SA. With a passion for solving technical challenges and a commitment to sharing his knowledge in different aspects of computer engineering, including but not limited to ETL pipelines and optimization, improving the in-house tooling, contributing to different architectural decisions, he makes a valuable contribution to his team's objectives. Prior to joining Frontiers, he gained experience working as a Devops engineer at CERN, where he actively contributed in projects related to cloud computing and disaster recovery, automation, observability and databases. He holds a MEng in Electrical and Computer Engineering from National Technical University of Athens.", "public_name": "Aimilios Tsouvelekakis", "guid": "e07c3a5d-216e-5b40-9103-556ec8b714c9", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/CD8CLV/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/FT7V39/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/FT7V39/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/FT7V39/resources/Optimi_TAJbepJ.pdf", "type": "related"}]}, {"guid": "419a2c60-15a0-53a4-ba5f-889f377eac9a", "code": "AAY8KQ", "id": 87705, "logo": null, "date": "2026-04-16T13:20:00+02:00", "start": "13:20", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-87705-simplicity-scales-rewriting-to-a-django-monolith-and-monorepo", "url": "https://pretalx.com/pyconde-pydata-2026/talk/AAY8KQ/", "title": "Simplicity Scales: Rewriting to a Django Monolith and Monorepo", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk", "language": "en", "abstract": "Simplicity scales better than complexity. In this talk we share what we learned from a year-long refactor of our Python-based infrastructure where we majorly improved developer velocity and overall developer happiness with two choices: moving everything into a monorepo and replacing our microservices architecture with a Django monolith. \r\nInstead of going deep on any single technology, we offer a holistic view of how these decisions enabled a multi-disciplinary team to move faster on a shared codebase. We'll introduce a blueprint for a uv-based Python monorepo, discuss why we chose \"boring\" tools over custom solutions, and share the metrics we used to measure success. The metrics dashboard will be open-sourced as part of this talk.", "description": "When working on older codebases most developers encounter the question of \"Should we fix this or rewrite it completely?\". Weeklong releases, multi-day bug hunts and a very obvious impact of tech debt on developer happiness and development velocity led us to ask that question in a very general way. We were wondering how a better approach to a Python-based infrastructure could look when working in a multi-disciplinary startup environment. We believe that many developers have been in a similar situation and we would like to introduce our holistic take on a when and how to refactor older codebases.\r\n\r\nOur solution to these problems consists of two main changes: moving the entire code of various teams in a uv-based monorepo and questioning a lot of technical decisions of the past under the paradigm of \"Can this be done simpler or can we rephrase the problem to solve it with an existing technical solution?\".  We will share our insights into how a multi-language monorepo approach can work at a startup where full-stack and ML practitioners work on the same code base. This includes going over standardized procedures (e.g. code quality) that are shared between all teams.\r\n\r\nFurthermore we will discuss some of the high-level decisions and introduce our reasoning within the available options of the python ecosystem. This includes for example why for us a monolithic approach based on Django works better than a Flask based microservice solution.\r\n\r\nThe goal of this talk is to give the listener an introduction to our solutions and make it easy to draw parallels to their own situation or problems. Attendees will leave with an insight into our decision framework and we will show what metrics we used to validate the success of our refactoring and technical choices.", "recording_license": "", "do_not_record": false, "persons": [{"code": "LCXWWK", "name": "Bruno Vollmer", "avatar": "https://pretalx.com/media/avatars/LCXWWK_CvfblVY.webp", "biography": "Hey I\u2019m Bruno, I\u2019m an experienced Senior Software Engineer passionate about building innovative solutions in fast-paced and evolving startup environments.\r\n\r\nI consider myself a generalist when it comes to software engineering and I enjoy working at the intersection of engineering leadership and software development. Currently I\u2019m working as a team lead in a startup in Lausanne. My team is mainly responsible for building and scaling the main platform of our solution. Next to this I\u2019m responsible for the overall software architecture of our solution.", "public_name": "Bruno Vollmer", "guid": "c769eb13-4929-576b-b6be-b2f753874e45", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/LCXWWK/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/AAY8KQ/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/AAY8KQ/", "attachments": [{"title": "Presentation", "url": "/media/pyconde-pydata-2026/submissions/AAY8KQ/resources/PyCon__klACDOm.pdf", "type": "related"}]}, {"guid": "5ecb39a8-8d60-5e66-8dfc-d09920766040", "code": "LVJXK3", "id": 85791, "logo": null, "date": "2026-04-16T14:00:00+02:00", "start": "14:00", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-85791-using-sensor-fusion-and-ml-to-navigate-underground-when-gps-fails", "url": "https://pretalx.com/pyconde-pydata-2026/talk/LVJXK3/", "title": "Using Sensor Fusion and ML to Navigate Underground When GPS Fails", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk", "language": "en", "abstract": "In the twisting vaults of a subway, metro, or U-Bahn, there\u2019s often no reliable cell service, wifi, or GPS. Which means riders had no good way of keeping track of their stops or ETA when underground.\r\nAfter collecting extensive ground truth data, we trained a motion classifier using the phone's accelerometer to identify a moving train. This prediction is fed into a location model that combines it with the train schedule to estimate a location, even when GPS fails. We cover our unique data pipeline, feature engineering, and the optimization for high-scale, offline edge deployment to millions of users.", "description": "In the twisting vaults of a subway, metro, or U-Bahn, there\u2019s often no reliable cell service, wifi, or GPS. Which means riders had no good way of keeping track of their stops or ETA when underground.\r\nAfter collecting extensive ground truth data, we trained a motion classifier using the phone's accelerometer to identify a moving train. This prediction is fed into a location model that combines it with the train schedule to estimate a location, even when GPS fails. We cover our unique data pipeline, feature engineering, and the optimization for high-scale, offline edge deployment to millions of users.\r\n\r\nAttendees will gain from the lessons learned developing a sensor fusion ML system for offline use in smartphones\r\n\r\n##### Data Collection & Annotation\r\nStrategies for gathering high-quality, labeled \"ground truth\", especially in cases where the labels can't be inferred by human annotators after the fact\r\n\r\n##### The ML Pipeline\r\nHyperparameter tuning of a convolutional neural network (CNN)\r\nBuilding a multi-stage training regimen, to leverage different datasets\r\n\r\n##### UX\r\nPresenting predictions to users in a way that expresses uncertainty when necessary, and inspires confidence when justified. We want users to forget GPS doesn't work underground.", "recording_license": "", "do_not_record": false, "persons": [{"code": "QXFCVA", "name": "\u00c9tienne Tremblay", "avatar": "https://pretalx.com/media/avatars/QXFCVA_o497y4c.webp", "biography": "Hi, I'm \u00c9tienne! I am responsible for developing machine learning solutions to rider problems, and turning them into features that help Transit users in their journeys. Before working at Transit, I was building databases and analysis tools for the aerospace industry. I have also been involved in the mobility professorship at Polytechnique Montr\u00e9al. I hold a B. Eng. in Aerospace from Polytechnique Montr\u00e9al.", "public_name": "\u00c9tienne Tremblay", "guid": "c6e631a7-b33b-5eac-a18b-b5f4574c8d3a", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/QXFCVA/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/LVJXK3/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/LVJXK3/", "attachments": []}, {"guid": "22a679fa-3f0e-5c7d-bf72-e06d61f1c6a6", "code": "3C9P9V", "id": 87172, "logo": null, "date": "2026-04-16T15:05:00+02:00", "start": "15:05", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-87172-is-my-ai-recruiting-biased-how-to-evaluate-these-systems", "url": "https://pretalx.com/pyconde-pydata-2026/talk/3C9P9V/", "title": "Is my AI Recruiting biased? - How to evaluate these systems", "subtitle": "", "track": "General: Ethics & Privacy", "type": "Talk", "language": "en", "abstract": "AI recruiting systems are increasingly used to filter, rank, and select applicants at scale. Yet their deployment raises essential questions: How reliable are these models in real hiring environments, and how do we ensure fairness and safety across diverse applicant profiles? This talk presents a structured approach to testing and validating AI-driven recruiting pipelines. It highlights the role of synthetic test data, data augmentation, and fairness metrics in uncovering systemic risks and mitigating bias. Attendees will walk through a complete evaluation workflow. The session also incorporates insights from real-world testing practices, demonstrating how rigorous validation can increase trust and transparency in recruitment AI.", "description": "AI recruiting systems are rapidly reshaping talent acquisition by automating candidate filtering, ranking, and selection. However, their growing influence raises critical concerns around fairness, robustness, and decision transparency. This talk introduces a practical testing methodology for evaluating AI recruiting pipelines beyond traditional accuracy metrics.\r\n\r\nWe will examine how synthetic data and augmentation techniques can expose hidden weaknesses, improve coverage, and stress-test edge cases. The talk will address the role of proxy variables, why they matter, and how they can help uncover unintended model behavior. We will also explore fairness measurement strategies, including individual and group fairness metrics, and discuss how these approaches reveal structural bias in ranking and scoring outcomes.\r\n\r\nBecause parts of the evaluation process can be automated, the session will demonstrate how Python-based agents and LLM \u201creferees\u201d can assist in generating and augmenting CVs and certificates, validating predictions, and assessing explanation quality. This automation can accelerate workflows, increase reproducibility, and reduce human error.\r\n\r\nParticipants will walk through a complete testing pipeline, supported by insights from real-world projects that illustrate how different tools and strategies expose systemic risks and guide mitigation. Attendees will leave with practical techniques to make recruiting systems more reliable, transparent, and trustworthy in real deployment contexts.", "recording_license": "", "do_not_record": false, "persons": [{"code": "SPBCU3", "name": "Sebastian Krauss", "avatar": "https://pretalx.com/media/avatars/SPBCU3_y9Q2ar2.webp", "biography": "My name is Sebastian and I work as an AI Test Engineer at Validaitor. With a background in Mechatronics and Autonomous Systems, and hands-on experience at Bosch, Fraunhofer, and in international research settings, I focus on the intersection of AI trustworthiness and real-world deployment. My current work involves developing methods to test AI models for vulnerabilities, safety risks, and secure behavior - ensuring AI systems perform reliably and ethically. I like to share my experience with other techies all around the world. When I don't look into a screen, I like bouldering and books. :)", "public_name": "Sebastian Krauss", "guid": "846a3913-f474-51d7-ac48-a64ce79112aa", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/SPBCU3/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/3C9P9V/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/3C9P9V/", "attachments": [{"title": "Presentation", "url": "/media/pyconde-pydata-2026/submissions/3C9P9V/resources/Pycon2_nEKiKku.pdf", "type": "related"}]}, {"guid": "5b6751f5-9333-5e03-a9c3-65ff41db1879", "code": "NN7CVP", "id": 87714, "logo": null, "date": "2026-04-16T15:45:00+02:00", "start": "15:45", "duration": "00:30", "room": "Europium [3rd Floor]", "slug": "pyconde-pydata-2026-87714-post-processing-and-visualization-of-astrophysical-data-with-pypluto", "url": "https://pretalx.com/pyconde-pydata-2026/talk/NN7CVP/", "title": "Post-Processing and Visualization of Astrophysical Data with PyPLUTO", "subtitle": "", "track": "PyData: Visualisation & Notebooks", "type": "Talk", "language": "en", "abstract": "Modern scientific workflows increasingly rely on interactive analysis, reproducibility, and high-quality visualisation. **PyPLUTO** is a Python package designed to explore, analyse, and visualise numerical simulations produced by the **PLUTO** code for computational astrophysics. This talk shows how *PyPLUTO* leverages the Python ecosystem to transform raw simulation outputs into clear, flexible analysis and visualization workflows.\r\n\r\nThe session demonstrates how domain-specific simulation data can be integrated with tools such as `NumPy` and `Matplotlib` to support efficient post-processing, rapid exploration, and production of publication-quality figures. Attendees will see how structured Python workflows can replace fragmented, ad-hoc scripts, how visualisation accelerates scientific insight, and how Python lowers the barrier between simulation output and interpretation.\r\n\r\nAlthough examples are drawn from computational astrophysics, the approach is broadly applicable to any field working with structured simulation data. The talk highlights how lightweight, Python-based post-processing tools can improve clarity, reproducibility, and productivity without imposing heavy frameworks or tightly coupled visualisation pipelines.", "description": "Numerical simulations often generate vast amounts of structured data; yet, extracting insights from these outputs remains a major challenge. Analysis is frequently performed through fragmented, ad-hoc scripts that are difficult to maintain, reuse, or reproduce. **PyPLUTO** is a Python package designed to address this gap by providing a clear and flexible interface for post-processing, analyzing, and visualizing simulation data produced by the **PLUTO** code for computational astrophysics.\r\n\r\nThis talk presents **PyPLUTO** as a case study in building lightweight, domain-specific scientific tools on top of the Python scientific ecosystem. The emphasis is on offline analysis and visualisation workflows that operate on completed simulation outputs, enabling efficient exploration, comparison, and communication of results. Rather than coupling visualisation to simulation runtime, **PyPLUTO** focuses on clarity, composability, and integration with established PyData libraries.\r\n\r\nThrough concrete examples, the session demonstrates how structured simulation data can be processed and visualised using tools such as NumPy and Matplotlib. Attendees will learn how Python-based workflows can replace scattered analysis scripts, how visualization supports rapid scientific insight, and how a clean separation between simulation and analysis enhances reproducibility and productivity.\r\n\r\n## Outline\r\n\r\n#### 1. **From Simulation Output to Insight**\r\n- Common challenges in post-processing large numerical simulations\r\n- The gap between raw data and scientific interpretation\r\n- Why offline analysis and visualisation remain essential\r\n\r\n#### 2. **PyPLUTO: Scope and Design**\r\n- What PyPLUTO does and the problems it targets\r\n- Design goals: simplicity, flexibility, and interoperability\r\n- Clear separation between simulation execution and analysis\r\n\r\n#### 3. **Working with Simulation Data**\r\n- Loading and organising structured simulation outputs\r\n- Handling scalar and vector fields across space and time\r\n- Typical post-processing tasks and analysis patterns\r\n\r\n#### 4. **Visualisation Workflows**\r\n- Exploratory plots and diagnostic views\r\n- Time evolution and comparison between simulations\r\n- Producing publication-quality figures with Matplotlib\r\n\r\n#### 5. **Interactive GUI for Post-Processing**\r\n- Lightweight graphical interfaces for exploring simulation data\r\n- Interactive selection of fields, slices, and time steps\r\n- GUI as a complement to scripting, not a replacement\r\n\r\n#### 6. **Integration with the Python Ecosystem**\r\n- Efficient data handling with NumPy\r\n- Interoperability with existing scientific Python tools\r\n- Benefits of building on established libraries\r\n\r\n#### 7. **Software Design Lessons**\r\n- Building user-friendly scientific APIs\r\n- Balancing usability, transparency, and performance\r\n\r\n#### 8. **Broader Applicability and Outlook**\r\n- Relevance to other simulation-heavy fields\r\n- Reusable patterns for Python-based post-processing\r\n- Future directions and potential extensions\r\n\r\nThe talk is aimed at scientists, data practitioners, and Python developers interested in scientific visualisation and simulation data analysis. No background in astrophysics or PLUTO is required; the focus is on workflows, tools, and design principles applicable across the PyData community.", "recording_license": "", "do_not_record": false, "persons": [{"code": "YSUFD3", "name": "Giancarlo Mattia", "avatar": "https://pretalx.com/media/avatars/YSUFD3_rDCUXCc.webp", "biography": "I am a postdoctoral researcher at the Max Planck Institute for Astronomy in Heidelberg. I am currently investigating the impact of non-ideal processes within protostellar and protoplanetary disks on their formation, evolution, and production of winds and collimated outflows.", "public_name": "Giancarlo Mattia", "guid": "6e6baddf-833b-5f52-995a-46de1cd6e8d5", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/YSUFD3/"}], "links": [{"title": "Presentation (made with google slides)", "url": "https://docs.google.com/presentation/d/1XNoe_t-PG9tju9ib1VoB33cGj_IJZP8_SHU6lm4ZIRo/edit?usp=sharing", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/NN7CVP/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/NN7CVP/", "attachments": []}], "Palladium [2nd Floor]": [{"guid": "61618f98-d745-57f4-9538-f675d9eaf24f", "code": "Q9HMT3", "id": 87168, "logo": null, "date": "2026-04-16T10:55:00+02:00", "start": "10:55", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-87168-schema-driven-lambdaliths-in-python-with-aws-lambda-powertools-and-pydantic", "url": "https://pretalx.com/pyconde-pydata-2026/talk/Q9HMT3/", "title": "Schema-Driven Lambdaliths in Python with AWS Lambda Powertools and Pydantic", "subtitle": "", "track": "PyCon: Django & Web", "type": "Talk", "language": "en", "abstract": "Modern web frameworks such as Hono have renewed interest in schema-driven development and the \u201cLambdalith\u201d architecture, where an application is delivered as a single AWS Lambda function. While this model provides a predictable developer experience, Python-based serverless systems often struggle to achieve the same consistency, validation, and maintainability in production.\r\n\r\nDeploying Python web frameworks to AWS Lambda frequently requires additional execution layers\u2014such as ASGI adapters or container-based runtimes\u2014which add complexity and blur data boundaries. For teams that prefer clear, minimal Lambda handlers, these abstractions can hinder both development and operations.\r\n\r\nThis session shares production-proven patterns for building schema-driven Lambdalith applications in Python using AWS Lambda Powertools and Pydantic, without relying on heavy framework abstractions. Through real-world examples, we show how these tools simplify handler logic, standardize request and response validation, and improve observability and error handling.\r\n\r\nAttendees will leave with practical techniques for building reliable and maintainable Python Lambdalith systems, and insights they can immediately apply to modernizing existing serverless codebases or delivering new production services with confidence.", "description": "The rise of modern web frameworks such as Hono has brought increased attention to schema-driven development and the \u201cLambdalith\u201d architecture, where an application is delivered through a single Lambda function. These approaches offer a highly streamlined developer experience, but many existing Python-based systems struggle to achieve the same level of consistency, validation, and maintainability.\r\n\r\nIn Python, closing this gap often means introducing additional execution layers outside the language itself. When frameworks designed around web servers and request lifecycles are deployed on AWS Lambda, they typically require ASGI adapters, web adapters, or container-based runtimes. While powerful, these layers can make it harder to focus on what many teams actually want: writing clear, minimal Python handlers with explicit data boundaries.\r\n\r\nThis talk explores how combining AWS Lambda Powertools and Pydantic can close that gap and enable a modern, predictable development workflow\u2014even in established Python ecosystems. Drawing from real-world product use cases, we will examine how these tools can simplify handler-level logic, standardize request and response validation, and improve observability and error handling.\r\n\r\nLambda Powertools provides far more than logging and metrics: it includes utilities for structured tracing, data parsing, idempotency, typed configuration, and other features that bring Python serverless development closer to the ergonomics of newer frameworks. When paired with Pydantic, developers can enforce clear data contracts, reduce boilerplate, and achieve stronger guarantees around application behavior.\r\n\r\nAttendees will learn practical patterns for improving quality and productivity in Lambda-based applications, including how to:\r\n\r\n- Validate event payloads and responses using Pydantic models\r\n- Implement consistent error handling strategies\r\n- Structure a Lambdalith-style architecture in Python\r\n- Leverage Powertools utilities to enhance reliability and developer experience\r\n\r\nThis session will be valuable for Python developers who want to apply schema-driven design principles, modernize existing serverless codebases, or build more maintainable Lambda applications with confidence.", "recording_license": "", "do_not_record": false, "persons": [{"code": "SF3WYV", "name": "Tanio Toranosuke", "avatar": "https://pretalx.com/media/avatars/SF3WYV_g8lreHU.webp", "biography": "IT engineer at DAIKIN INDUSTRIES, LTD. (Japan), working across the full stack\u2014infrastructure, frontend, and backend development. Primarily writes Python and TypeScript.\r\nCurrently building energy optimization tools that analyze HVAC system data to generate operational improvement proposals. \r\nWorks in a Scrum-based team environment and has experience contributing to open source projects.", "public_name": "Tanio Toranosuke", "guid": "ae740299-f470-5190-8c0a-e2ab69592e4d", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/SF3WYV/"}, {"code": "N9LBTH", "name": "Haruto Mori", "avatar": "https://pretalx.com/media/avatars/N9LBTH_yiGd7yy.webp", "biography": "IT engineer at DAIKIN INDUSTRIES, LTD. (Japan), working across the full stack\u2014infrastructure, frontend, and backend development. Primarily writes Python and TypeScript.\r\nCurrently building energy optimization tools that analyze HVAC system data to generate operational improvement proposals. \r\nWorks in a Scrum-based team environment and has experience contributing to open source projects. \r\nIn personal development, enjoys experimenting with Cloudflare Workers for serverless applications.", "public_name": "Haruto Mori", "guid": "17f3da15-bb70-57fc-8426-5b4f20194130", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/N9LBTH/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/Q9HMT3/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/Q9HMT3/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/Q9HMT3/resources/pycond_at83s9G.pdf", "type": "related"}]}, {"guid": "973ce8b5-f426-58bd-95ac-3363fe5ec109", "code": "GAUNKM", "id": 87201, "logo": null, "date": "2026-04-16T11:35:00+02:00", "start": "11:35", "duration": "00:45", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-87201-when-llms-are-too-big-building-cost-efficient-high-throughput-ml-systems-for-e-commerce-cataloging", "url": "https://pretalx.com/pyconde-pydata-2026/talk/GAUNKM/", "title": "When LLMs Are Too Big: Building Cost-Efficient High-Throughput ML Systems for E-Commerce Cataloging", "subtitle": "", "track": "PyCon: MLOps & DevOps", "type": "Talk (long)", "language": "en", "abstract": "E-commerce cataloging at idealo operates at extreme scale: 4.5 billion offers from 50,000+ shops across six countries, with peak ingestion rates of 4.8 million offers per minute. While large language models (LLMs) provide strong classification accuracy, they are too slow and costly for billion-scale real-time processing. This talk shows how idealo builds a cost-efficient, high-throughput machine learning system that leverages LLM knowledge without deploying full models in production. \r\n\r\nWe present how knowledge distillation from a large e5 instruction model enables a compact multilingual MiniLM encoder to achieve high accuracy, and how optimized inference runtimes and specialized hardware such as AWS Neuron help meet strict latency and cost requirements. Beyond modeling, we highlight key operational challenges: constructing training datasets from massively imbalanced data, selecting the right encoder architecture from today\u2019s model landscape, and designing a robust MLOps lifecycle with automated data sampling, training, deployment, and monitoring. \r\n\r\nAttendees will learn practical techniques for scaling ML systems under real-world constraints, how to extract value from LLMs when they are too large to serve directly, and how to transition research prototypes into reliable, high-volume production pipelines.", "description": "## When LLMs Are Too Big: Building Cost-Efficient High-Throughput Machine Learning for Cataloging in E-Commerce\r\n\r\n \r\n\r\nidealo.de offers a price comparison service for over 5.7 million products from a wide variety of over thousands of categories. It navigates a dynamic, constantly changing billion-scale landscape with **over 2 billion offers from 50,000+ shops in 6 countries**. Our central challenge is cataloging this huge amount of offers automatically at scale, with a peak throughput of **processing 4.8 million offers per minute.** \r\n\r\n \r\n\r\nWhile modern large language models (LLMs) excel in such tasks, they do not scale well to huge amounts of data. To fulfill business needs, we need to strike a balance between processing speed and offer cataloging quality. By employing modern machine learning techniques to extract specialist knowledge from downscaled state-of-the-art LLMs and a multitude of performance enhancing techniques we speed up idealo\u2019s processing while massively improving cataloging performance. This talk presents how these solutions find the balance between cost and performance and how they integrate into idealo\u2019s offer cataloging pipelines. \r\n\r\n  \r\n\r\n \r\n\r\n### What makes this approach unique?\r\n\r\nOur solution and practical experiences in the area of high-throughput classification are presented. This includes the operational aspects of our system, in particular the design of a stable and high-performance MLOps lifecycle integrated into our CI/CD and continuous Training pipelines. Where we automate continuous data sampling, model training, model deployments, and monitoring. \r\n\r\nConcrete solutions and best practices are discussed that demonstrate how our model accuracy of the multilingual MiniLM transformer encoder model is improved through knowledge distillation by a large e5 instruction transformer. Additionally, we show how the integration of these models on specialized hardware like AWS Neuron enables strict runtime and latency requirements to be met in a cost-efficient manner. \r\n\r\nIn detail we will discuss the following topics: \r\n\r\n* Machine Learning Operation Lifecyle for a high-throughput category classification system. \r\n* Challenges when creating training and testing datasets from the huge amount of existing massively unbalanced data efficiently. \r\n* Selecting the right model in presence of the current encoder language model zoo. \r\n* Using knowledge distillation via student-teacher models to balance required compute and classification performance. \r\n* Integrating quantization techniques for speed improvements. \r\n* Selecting ideal compute instances for our production environment. \r\n* How to compile the model on custom designed machine learning accelerators using the neuron package. \r\n\r\n \r\n\r\n \r\n\r\n### Key takeaways for attendees:\r\n\r\n* An overview of months of research and exploration for massive throughput environments including their practical integration in live systems. \r\n* Modern machine learning systems in production, especially with billion-scale data, need to carefully balance business needs in terms of cost and quality. \r\n* State-of-the-art LLMs are often not feasible for large-scale tasks. However, new machine learning techniques can extract their knowledge for specific applications. \r\n* How to transition research findings to production. \r\n\r\n \r\n\r\n \r\n\r\nThe talk will be aligned along our tech stack, which includes PyTorch, PyTorch Lightning, Huggingface, AWS Sagemaker, AWS Neuron SDK, Grafana Loki, Docker and GitHub Actions.", "recording_license": "", "do_not_record": false, "persons": [{"code": "UXME8R", "name": "Tobias Senst", "avatar": "https://pretalx.com/media/avatars/UXME8R_zNuS6tq.webp", "biography": "Tobias Senst is a Senior Machine Learning Engineer at idealo internet GmbH. He received his PhD from Technische Universit\u00e4t Berlin and has more than 10 years of experience in computer vision and video analytics research.\r\n\r\nAt idealo, he transitioned from working with images and videos to natural language processing. For the past five years, he has worked on high-performance machine learning models and automated continuous training, development, and integration (CI/CD/CT) pipelines in the field of cataloging. His professional focus is on MLOps, ML testing, and research.", "public_name": "Tobias Senst", "guid": "e4d322ca-e6b4-5640-9a83-4494d66ba14e", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/UXME8R/"}, {"code": "LGQXA9", "name": "Bastian Wandt", "avatar": "https://pretalx.com/media/avatars/LGQXA9_gRN1hXU.webp", "biography": "Bastian is a Senior Machine Learning Research Engineer at idealo Internet GmbH, where he focuses on large-scale offer cataloging and high-throughput machine learning systems. Before joining idealo in 2025, he was an Assistant Professor at Link\u00f6ping University in Sweden, leading a research group in 3D computer vision.\r\n\r\nHe completed his PhD in 2020 at Leibniz University Hannover with a thesis on 3D human pose estimation and subsequently spent two years at the University of British Columbia in Canada as a PostDoc, expanding his research into broader areas of 3D computer vision and teaching related courses.", "public_name": "Bastian Wandt", "guid": "1708158b-0caa-5569-baea-ddebafb67462", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/LGQXA9/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/GAUNKM/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/GAUNKM/", "attachments": [{"title": "Presentation Slides", "url": "/media/pyconde-pydata-2026/submissions/GAUNKM/resources/PyConD_hRbtkri.pdf", "type": "related"}]}, {"guid": "3d9698d0-4183-5c6d-b088-19b541dbfafb", "code": "NDZSSB", "id": 88271, "logo": null, "date": "2026-04-16T13:20:00+02:00", "start": "13:20", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-88271-free-t-h-r-e-ading-a-trading-systems-journey-beyond-the-gil", "url": "https://pretalx.com/pyconde-pydata-2026/talk/NDZSSB/", "title": "Free T(h)r(e)ading: A Trading Systems Journey Beyond the GIL", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Talk", "language": "en", "abstract": "Python 3.13's free-threaded mode opens new territory for Python concurrency. We embarked on an experiment: could a trading algorithm benefit from true parallelism, and what would it take to get there? This talk documents our research journey from async/await to free threading\u2014the hypotheses we tested, the benchmarks we designed, the unexpected behaviours we discovered, and the systematic approach we took to validating whether GIL-free Python could handle real-time market data. You'll see our experimental methodology, the data we collected, surprising findings about thread scheduling and memory patterns, and what our results suggest about Python's concurrent future.", "description": "The release of Python 3.13 with experimental free-threaded mode (PEP 703) represents a fundamental shift in Python's concurrency model. For decades, the Global Interpreter Lock has dictated how we write concurrent Python code, pushing developers toward async/await patterns for I/O-bound workloads and multiprocessing for CPU-bound tasks. But what happens when we remove that constraint?\r\n\r\nWe designed a research experiment to answer this question empirically: take a production trading algorithm built on asyncio, migrate it to free threading, and measure everything. Trading systems make ideal subjects for this research\u2014they're latency-sensitive, handle multiple concurrent data streams, perform both I/O and CPU-bound operations, and have clear, quantifiable performance metrics.\r\n\r\nThis talk presents our complete research journey, from initial hypothesis to validated conclusions, sharing both our methodology and findings.\r\n\r\nDetailed Outline:\r\n1. Research Question & Motivation (3 minutes)\r\n\r\nThe research question: can a trading algorithm benefit from true parallelism?\r\nWhy trading systems make ideal experimental subjects\r\nInitial hypotheses about performance characteristics\r\nBaseline system: async architecture and performance profile\r\n\r\n2. Experimental Design (4 minutes)\r\n\r\nMigration approach\r\nBenchmarking framework\r\nWorkload simulation\r\nControl variables and isolation of I/O vs. CPU-bound operations\r\n\r\n3. Migration Journey (5 minutes)\r\n\r\nArchitectural transformation\r\nKey refactoring patterns and synchronization strategies\r\nThread safety challenges\r\nLibrary ecosystem compatibility findings\r\n\r\n4. Results & Discoveries (8 minutes)\r\n\r\nPerformance data: latency, throughput, and resource utilization\r\nWorkload analysis: where free threading won, where async remained competitive\r\nVisual data presentation: charts and comparative analysis\r\n\r\n5. Practical Implications (4 minutes)\r\n\r\nDecision framework: when to choose free threading over async\r\nMigration best practices and lessons learned\r\nProduction readiness assessment\r\nWhat this means for Python's concurrent future\r\n\r\n6. Q&A (5 minutes)\r\n\r\nPrerequisites - This talk assumes attendees have:\r\n\r\n- Strong understanding of Python's concurrency models (asyncio, threading, multiprocessing)\r\n- Familiarity with the GIL and its implications\r\n- Basic understanding of systems programming concepts (thread safety, synchronization)", "recording_license": "", "do_not_record": false, "persons": [{"code": "BPNG3K", "name": "Tim Kreitner", "avatar": null, "biography": "Tim Kreitner is a Senior Software Engineer at Vattenfall Energy Trading GmbH in Hamburg, Germany. With a background in Mechanical and Computational Engineering, Tim transitioned into the field of finance, leveraging various programming languages.\r\n\r\nAt Vattenfall, Tim develops Algorithmic Trading Infrastructure applications. Including Order Routers, Market Data Servers, Exchange connections.", "public_name": "Tim Kreitner", "guid": "ed537f5f-3e94-593f-855e-0198bc446041", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/BPNG3K/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/NDZSSB/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/NDZSSB/", "attachments": []}, {"guid": "f130a313-9451-5714-aa08-748f85b339e0", "code": "VUHSG9", "id": 87315, "logo": null, "date": "2026-04-16T14:00:00+02:00", "start": "14:00", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-87315-letting-ai-move-robotics-demos-powered-by-python", "url": "https://pretalx.com/pyconde-pydata-2026/talk/VUHSG9/", "title": "Letting AI Move: Robotics Demos Powered by Python", "subtitle": "", "track": "PyCon: Embedded Systems & Robotics", "type": "Talk", "language": "en", "abstract": "AI is sometimes hard to explain, especially for people outside of tech. With robots, AI becomes visible and tangible. In this talk we want to show how we can use Python and the huggingface reachy mini as an example to make AI more concrete, interactive, and engaging for beginners and non-experts.", "description": "Artificial intelligence can be difficult to explain, especially to people outside of tech. We often rely on slides, diagrams, or on-screen demos, but the real impact does not always stick. For people encountering AI for the first time\u2014such as students or non-technical audiences\u2014AI terminology and concepts can remain abstract and disconnected from real-world experience.\r\n\r\nRobots can help change that. When AI controls a physical system, its behavior becomes visible, tangible, and easier to reason about. In this talk, we explore how Python and playful robotics experiments can be used to make AI more concrete, interactive, and engaging. Using the Hugging Face Reachy Mini robot as a case study, we show how physical interaction can turn abstract AI concepts into intuitive, memorable experiences.\r\n\r\nThe perspective of this talk is intentionally non-traditional: we started with no prior knowledge of robotics or mechanics and approached the problem purely from a Python developer\u2019s point of view. This journey strongly shapes the talk. Rather than focusing on advanced robotics engineering, the emphasis is on accessibility, experimentation, and learning by doing. The goal is to show that robotics can be an approachable medium for explaining AI, even for people without a hardware or engineering background.\r\n\r\nDuring the talk, we walk through basic building blocks such as movement, gestures, and simple interaction patterns, and show how AI-driven behavior can be layered on top of them using familiar Python tools. We share examples from real experiments and demos, including what worked well, what failed, and what we learned from unexpected behavior in live settings.\r\n\r\nImportantly, this is not a product demo or a hardware-specific tutorial. While Reachy Mini is used as a concrete example, the focus is on transferable ideas and design patterns:\r\n\r\nHow physical interaction changes the way people perceive AI\r\n\r\nHow Python lowers the barrier to experimenting with robotics\r\n\r\nHow to design demos that invite curiosity rather than intimidation\r\n\r\nHow to make AI systems easier to explain in educational and outreach contexts\r\n\r\nAttendees do not need access to a robot to benefit from this talk. The lessons and patterns discussed can be applied to a wide range of settings, including classrooms, workshops, meetups, and public demonstrations.\r\n\r\nThis talk is aimed at Python beginners and intermediate developers, especially educators and anyone who regularly needs to explain or demonstrate AI to others. Attendees will leave with new ideas, inspiration, and practical approaches for making AI more tangible, engaging, and human-centered.", "recording_license": "", "do_not_record": false, "persons": [{"code": "HVSNHD", "name": "Larissa Haas", "avatar": "https://pretalx.com/media/avatars/HVSNHD_WORK9QI.webp", "biography": "I'm Squad Lead for Automation & Analytics, coordinating Process Automation projects and drafting solutions for intelligent enterprises. Within projects, I work as a Senior Data Scientist and Cloud Solution Architect, combining various BTP services with Artificial Intelligence. \r\nIf you like to chat about Artificial Intelligence, Science Fiction, bots gone rogue and seeking for world domination, or Roundnet you're more than welcome to contact me!", "public_name": "Larissa Haas", "guid": "8076675d-8b92-5d5f-a648-f14bdf8a61d8", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/HVSNHD/"}, {"code": "HPJTAP", "name": "Annika Herbert", "avatar": null, "biography": "Annika Herbert is a Solution Architect in the AI & Data Unit at sovanta, working on data-driven and AI-powered solutions. With a background in Data Science, she enjoys making AI more approachable, tangible, and easy to explain, especially to non-technical audiences. Coming from a Python developer\u2019s perspective, she likes to explore new fields through hands-on experimentation and learning by doing. Outside of work, she enjoys dancing, concerts, and baking chocolate-filled treats.", "public_name": "Annika Herbert", "guid": "f80c8728-51ec-58f5-a5a1-9f3d51fa0d3d", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/HPJTAP/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/VUHSG9/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/VUHSG9/", "attachments": [{"title": "Talk Slides", "url": "/media/pyconde-pydata-2026/submissions/VUHSG9/resources/2026-0_GeQRBwB.pdf", "type": "related"}]}, {"guid": "87887c8c-aa32-52fe-9c54-69066d66424e", "code": "QMBEZX", "id": 86856, "logo": null, "date": "2026-04-16T15:05:00+02:00", "start": "15:05", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-86856-is-digital-sovereignty-a-new-buzzword-in-ai-development", "url": "https://pretalx.com/pyconde-pydata-2026/talk/QMBEZX/", "title": "Is digital sovereignty a new buzzword in AI development?", "subtitle": "", "track": "General: Ethics & Privacy", "type": "Talk", "language": "en", "abstract": "AI development usually focuses on feasibility and implementation, but a new buzzword is now being used: 'sovereignty'. While customers are excited about it, what does it mean for them and for AI developers? In this presentation, we analyse different aspects of sovereignty and explore how it can be used to build trustworthy AI solutions. \r\nWe will also discuss current examples from politics and development to identify the best practices for secure data processing.", "description": "AI development typically prioritises feasibility and implementation. While solutions should be efficient, high-performing and scalable, sovereignty and data security are often overlooked. These issues tend to be overlooked when solutions are being found, even though we don't use AI as an end in itself, but rather to benefit or support our customers. Customers operate within a regulatory framework and rely on responsible technology. \r\nRather than seeing regulation as a hindrance, we should view it as an opportunity to drive innovation and create sustainable, trustworthy solutions. However, this is only possible if we understand the full meaning of sovereignty. \r\nThis presentation will explore the various aspects of the term 'sovereignty' and its potential impact on AI projects. We will discuss current examples from politics and development to identify best practices for secure data processing.", "recording_license": "", "do_not_record": false, "persons": [{"code": "LT7LHM", "name": "Dr. Maria B\u00f6rner", "avatar": "https://pretalx.com/media/avatars/LT7LHM_3wlWMvu.webp", "biography": "Dr. Maria B\u00f6rner holds a Ph.D. in physics from CERN and DESY and is an expert in the field of AI. She is the head of the AI Competence Center at Westernacher Solutions. In this position, she is responsible for developing AI tools for government, church, and justice organizations. She strengthens the company's internal AI comptences and promotes them externally. She is also the deputy chairwoman of the Legal Tech working group at Bitkom and the German ambassador of the Women in AI network. Maria has worked in the field of AI for over eight years, focusing on responsible and ethical AI.", "public_name": "Dr. Maria B\u00f6rner", "guid": "71e27123-f3ab-5a5a-b328-ca90ab90adff", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/LT7LHM/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/QMBEZX/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/QMBEZX/", "attachments": []}, {"guid": "3ec044c5-f4b3-5922-ae5d-5265cf156c98", "code": "BRRHGY", "id": 88397, "logo": null, "date": "2026-04-16T15:45:00+02:00", "start": "15:45", "duration": "00:30", "room": "Palladium [2nd Floor]", "slug": "pyconde-pydata-2026-88397-on-interventional-generalisation", "url": "https://pretalx.com/pyconde-pydata-2026/talk/BRRHGY/", "title": "On Interventional Generalisation", "subtitle": "", "track": "PyData: Machine Learning & Deep Learning & Statistics", "type": "Talk", "language": "en", "abstract": "If I do X instead of Y, will I get the outcome I want? What about in a new unseen situation? Making predictions alone is pointless, one wants to act in the world. Furthermore one must act in situations that are similar but different to all past experience. The real underlying goal of all decision making is really interventional generalisation: the ability to evaluate hypothetical choices in new unseen situations. Unfortunately data science and statistics has a inordinate focus on observation and statistical significance instead of intervention, counter-factuals and generalisation. Improve your modelling both practically and conceptually with the mental tools presented in this talk.", "description": "If I do X instead of Y, will I get the outcome I want (in a novel situation)? Making predictions alone is pointless, one wants to act in the world. Furthermore one must act in situations that are similar but different to all past situations. The real underlying goal of all decision making is interventional generalisation: the ability to evaluate hypothetical choices in new unseen situations.\r\n\r\nThis talk covers this history and problems of null hypothesis significance testing, the benefits (and limitations) of Bayesian reasoning. Introduces the basics of Pearl-ian causality theory and its treatment of interventions and counter-factuals (things that hypothetically could have happened, but didn't), finally we discuss the next step, interventional generalisation, that is being able to compare the value of hypothetical interventions in new unseen situations. Decisively improve your modelling practically and conceptually with the mental tools in this talk.", "recording_license": "", "do_not_record": false, "persons": [{"code": "Y3GHEB", "name": "Andy Kitchen", "avatar": "https://pretalx.com/media/avatars/Y3GHEB_oIDflAH.webp", "biography": "Born hacker. Curious human. I've started a couple of companies. I liked AI before it was cool, I swear.", "public_name": "Andy Kitchen", "guid": "3d850e4c-f6e5-588b-8700-0a4a6f2e6242", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/Y3GHEB/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/BRRHGY/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/BRRHGY/", "attachments": []}], "Ferrum [2nd Floor]": [{"guid": "7ebe30f6-0626-507f-9216-1d8af58e7d5c", "code": "GPJGH3", "id": 86991, "logo": null, "date": "2026-04-16T10:15:00+02:00", "start": "10:15", "duration": "01:30", "room": "Ferrum [2nd Floor]", "slug": "pyconde-pydata-2026-86991-building-reliable-data-pipelines-with-polars-and-dataframely", "url": "https://pretalx.com/pyconde-pydata-2026/talk/GPJGH3/", "title": "Building reliable data pipelines with polars and dataframely", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Tutorial", "language": "en", "abstract": "If you have worked with real-world data before, you know that processing it can be challenging. Data often comes scattered across tables, in inconsistent encodings, with duplicated rows and is generally dirty. In this tutorial, you will learn how to process large amounts of data reliably and quickly using `polars` and `dataframely`.\r\n\r\nWhat we love about `polars` is that it's easy to use, fast and elegant \u2014 it allows us to build and compose complex transformations with ease. On this basis, we built `dataframely`: a library for defining and validating contents of polars data frames. With `dataframely`, we can build pipelines without ever getting confused about what's in our data frames. We document and validate our expectations and assumptions clearly, which makes our pipeline code simpler and easier to understand. \"Is this join correct?\", and \"where did this column come from?\" are questions you will not have to worry about anymore.\r\n\r\nIn this tutorial, you will become familiar with `polars` basics by writing a simple pipeline: you will read data, transform it to make it ready for use, and you will learn how to do that fast. With `dataframely` schemas, you will upgrade your code from \"it works\" to \"it's beautiful!\", and along the way, `dataframely` will help you eliminate entire classes of bugs you will never have to think about again. After the tutorial, you will be all set to use these tools in your own work.", "description": "**Note for attendees: Please check out the [git repository](https://github.com/Quantco/tutorial-pycon26-polars-dataframely) and follow the simple setup steps in the `README`, ideally before the tutorial.** \r\n\r\nIn this tutorial, you will become familiar with `polars` basics by writing a simple pipeline: you will read data, transform it to make it ready for use, and you will learn how to do that fast. With `dataframely` schemas, you will upgrade your code from \"it works\" to \"it's beautiful!\", and along the way, `dataframely` will help you eliminate entire classes of bugs you will never have to think about again. After the tutorial, you will be all set to use these tools in your own work.", "recording_license": "", "do_not_record": false, "persons": [{"code": "Y3BGJB", "name": "Oliver Borchert", "avatar": "https://pretalx.com/media/avatars/Y3BGJB_yUpTW2t.webp", "biography": "For the past 4 years, I have been working on machine learning and data engineering and QuantCo. Previously, I studied computer science at the Technical University of Munich, focusing on machine and deep learning.", "public_name": "Oliver Borchert", "guid": "39865145-c435-5506-8b4b-1e234a152a1d", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/Y3BGJB/"}, {"code": "D93SYG", "name": "Andreas Albert", "avatar": "https://pretalx.com/media/avatars/D93SYG_OnUN9NK.webp", "biography": "I am a software and data engineer working on data pipelines at QuantCo. In a previously life I looked for dark matter in particle collisions.", "public_name": "Andreas Albert", "guid": "3141689b-3a10-5ec1-89be-d6526b393e1e", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/D93SYG/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/GPJGH3/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/GPJGH3/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/GPJGH3/resources/tutori_LCDBrCy.pdf", "type": "related"}]}, {"guid": "1220a95b-8d2d-5787-84f7-444aeef4e81a", "code": "GCGLPN", "id": 85013, "logo": null, "date": "2026-04-16T13:10:00+02:00", "start": "13:10", "duration": "01:30", "room": "Ferrum [2nd Floor]", "slug": "pyconde-pydata-2026-85013-pytest-tips-and-tricks-for-a-better-testsuite", "url": "https://pretalx.com/pyconde-pydata-2026/talk/GCGLPN/", "title": "pytest tips and tricks for a better testsuite", "subtitle": "", "track": "PyCon: Programming & Software Engineering & Testing", "type": "Tutorial", "language": "en", "abstract": "pytest lets you write simple tests fast - but also scales to very complex scenarios: Beyond the basics of no-boilerplate test functions, this training will show various intermediate/advanced features, as well as gems and tricks.\r\n\r\nTo attend this training, you should already be familiar with the pytest basics (e.g. writing test functions, parametrize, or what a fixture is) and want to learn how to take the next step to improve your test suites.\r\n\r\nIf you're already familiar with things like fixture caching scopes, autouse, or using the built-in `tmp_path`/`monkeypatch`/... fixtures: There will probably be some slides about concepts you already know, but there are also various little hidden tricks and gems I'll be showing.", "description": "We'll cover things like:\r\n\r\n- Recommended pytest settings for more strictness\r\n- What's xfail and why is it useful?\r\n- How to mark an entire test file or single parameters\r\n- Ways to deal with parametrize IDs and syntax\r\n- Useful built-in pytest fixtures\r\n- Caching for fixtures\r\n- Using fixtures implicitly\r\n- Advanced fixture and parametrization topics\r\n- How to customize fixtures behavior based on markers or custom CLI arguments\r\n- If time permits: Short intro to writing pytest plugin and to property-based testing with Hypothesis\r\n\r\n**To prepare, please clone the [GitHub repository](https://github.com/The-Compiler/pytest-tips-and-tricks) and follow the setup steps in the README.**", "recording_license": "", "do_not_record": false, "persons": [{"code": "BPA78X", "name": "Freya Bruhin", "avatar": "https://pretalx.com/media/avatars/BPA78X_PM1OjOT.webp", "biography": "Freya Bruhin (\"The Compiler\") is a long-time contributor and maintainer of both the pytest framework and various plugins. Discovering pytest in 2015, Freya has since given talks and conducted workshops about pytest at various conferences and companies. Freya's main project, qutebrowser (a keyboard-focused web browser), has grown from a hobby to a donation-funded part-time job.", "public_name": "Freya Bruhin", "guid": "97323304-0f6e-5496-a41f-38e84991e7ca", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/BPA78X/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/GCGLPN/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/GCGLPN/", "attachments": [{"title": "Slides", "url": "/media/pyconde-pydata-2026/submissions/GCGLPN/resources/pytest_8CT76vt.pdf", "type": "related"}]}, {"guid": "b4c5b2b3-416a-5279-9cd2-f9f67bfd1203", "code": "KQM8JJ", "id": 87730, "logo": null, "date": "2026-04-16T15:05:00+02:00", "start": "15:05", "duration": "00:30", "room": "Ferrum [2nd Floor]", "slug": "pyconde-pydata-2026-87730-foundation-models-in-forecasting-are-we-there-yet-lessons-from-the-trenches", "url": "https://pretalx.com/pyconde-pydata-2026/talk/KQM8JJ/", "title": "Foundation Models in Forecasting: Are We There Yet? Lessons from the Trenches", "subtitle": "", "track": "PyData: Generative AI & Synthetic Data", "type": "Talk", "language": "en", "abstract": "The rise of time-series foundation models like Chronos-2 and TimesFM has sparked a debate: can a single pre-trained model replace the specialized \"local\" models we have tuned for years? We moved beyond the hype to test these models in production-like environments, from high-level market trends to granular article-level demand. In this talk, we share a transparent look at our journey: the zero-shot capabilities of these models, the reality of fine-tuning with exogenous business drivers, and a comparison between generative models and state-of-the-art classical methods. We categorize what is currently possible, what remains a challenge, and provide a roadmap for teams looking to integrate foundation models into their forecasting stack without sacrificing reliability.", "description": "The landscape of time-series forecasting is undergoing a seismic shift. With the emergence of foundation models like Chronos 2 and TimesFM, the industry is at a crossroads: can a large-scale pre-trained model truly replace the specialized, \"local\" models that practitioners have spent years tuning?\r\n\r\nIn this talk, we move beyond theoretical benchmarks to provide a transparent look at testing time-series foundation models in production-like environments. We explore the transition from traditional statistical and machine learning methods to generative architectures, focusing on the practical challenges that arise when \"zero-shot\" capabilities meet the messy reality of business data.\r\n\r\n### What you will learn:\r\n\r\n* **The Foundation Model Landscape:** A high-level mapping of the current state-of-the-art and how these architectures differ from classical statistical and ML approaches.\r\n* **Zero-Shot vs. Reality:** How pre-trained models handle domain-specific context and exogenous business drivers\u2014such as promotions, seasonality, and market shocks\u2014without explicit training.\r\n* **The Operational Shift:** How moving toward foundation models changes the MLOps lifecycle,from data preparation to running inference at scale\r\n* **Predictive Stability & Trust:** A framework for evaluating whether a model is \"production-ready,\" focusing on forecast stability and consistency of predictions over time.\r\n* **A Decision Roadmap:** A practical checklist for teams looking to integrate these models into their stack without sacrificing reliability.\r\n\r\nWhether you are a data scientist looking to upgrade your forecasting pipeline or a lead evaluating the impact of Foundation Models on time-series workflows, this session offers a grounded, hype-free perspective from the front lines of implementation.", "recording_license": "", "do_not_record": false, "persons": [{"code": "UGLH8C", "name": "Dr. Irena Bojarovska", "avatar": "https://pretalx.com/media/avatars/UGLH8C_MfmNPqJ.webp", "biography": "Irena Bojarovska is an Applied Scientist at Zalando SE, focusing on time\u2011series forecasting and demand prediction across 24+ markets.\r\n\r\nOriginally from Macedonia, she earned a BSc and an MSc in Applied Mathematics and Computer Science in Russia and a PhD in Applied Harmonic Analysis from TU Berlin. She began her industry career as an analyst at Air Berlin and, since 2017, has worked on causal inference for marketing, automation, demand forecasting, hierarchical reconciliation, and time\u2011series foundation models at Zalando. Outside work she leads a math circle for children at Lyzeum 2 and enjoys spending time with her family.", "public_name": "Dr. Irena Bojarovska", "guid": "a374fde5-b9a5-5cf4-9d05-26ef27d171a7", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/UGLH8C/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/KQM8JJ/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/KQM8JJ/", "attachments": []}], "Dynamicum [Ground Floor]": [{"guid": "b7d80048-cf3f-5559-8023-c0c1d0230ddc", "code": "VJPQCR", "id": 88418, "logo": null, "date": "2026-04-16T10:15:00+02:00", "start": "10:15", "duration": "01:30", "room": "Dynamicum [Ground Floor]", "slug": "pyconde-pydata-2026-88418-your-data-is-leaking-a-hands-on-introduction-to-differential-privacy-with-opendp", "url": "https://pretalx.com/pyconde-pydata-2026/talk/VJPQCR/", "title": "Your Data Is Leaking: A Hands-On Introduction to Differential Privacy with OpenDP", "subtitle": "", "track": "General: Ethics & Privacy", "type": "Tutorial", "language": "en", "abstract": "Data analysis and machine learning often involve sensitive information. But how can we ensure that our analyses and releases do not inadvertently reveal information about the individuals in our data? Traditional approaches such as anonymization or releasing only aggregate statistics have repeatedly proven insufficient.\r\n\r\nDifferential privacy is a mathematical framework that offers provable privacy guarantees while still enabling useful data analysis. In this tutorial, we provide a hands-on introduction to differential privacy, covering key concepts relevant to understanding and applying it in practice. The focus will be on practical implementation rather than underlying theory.\r\n\r\nUsing interactive examples in Python, we will explore the core ideas of differential privacy, highlight its attractive properties and limitations, and demonstrate how to build privacy-preserving analyses using OpenDP, an open-source Python library for differential privacy. Participants will leave equipped to continue exploring differential privacy on their own. Familiarity with the basics of Python programming is helpful, but no prior knowledge of differential privacy is required.", "description": "Aggregate statistics feel safe to release - just counts, means, and totals, no individual records. But a long history of privacy failures has shown otherwise. From the AOL search data leak to the Netflix Prize re-identification attack to LLM memorization, \"anonymized\" data has repeatedly revealed more than intended.\r\n\r\nDifferential privacy offers a different approach: a mathematical framework that quantifies and bounds the information any release reveals about any individual. It has moved from theory to practice in recent years, with deployments at the US Census, Wikimedia, Israel\u2019s national birth registry, Google, Apple, Linkedin and more.\r\n\r\nIn this tutorial, we provide a hands-on introduction to differential privacy. We'll start by making the problem concrete - executing an attack on aggregate statistics - and then explore how differential privacy addresses it. The focus will be on practical implementation rather than underlying theory.\r\n\r\n## What You'll Learn\r\n1. Why traditional anonymization and aggregation fail to protect privacy\r\n1. The core ideas of differential privacy: what it guarantees, what epsilon means, and when DP is a suitable solution\r\n1. How to use OpenDP's building blocks\r\n1. How to build differentially private data analyses using OpenDP's Polars integration\r\n1. Where to go next: resources for AI/ML with DP, synthetic data, and further learning\r\n\r\n## Tutorial Outline\r\n\r\n### Part 1 - The Privacy Problem (20 minutes)\r\n\r\n- Real-world privacy failures (such as AOL search data, Netflix Prize, LLM memorization)\r\n- Hands-on: execute a reconstruction attack on aggregate statistics\r\n- Discussion: why traditional approaches fail\r\n\r\n### Part 2 - Introduction to Differential Privacy (20 minutes)\r\n\r\n- Core ideas: masking the contribution of a single individual through calibrated noise; protection against membership inference attack\r\n- Learning by doing: exploring DP with OpenDP's building blocks\r\n- Tuning privacy protection with f-DP; the privacy-utility tradeoff\r\n- Real-world deployments (such as US Census, Israel birth registry, LinkedIn API)\r\n\r\n### Part 3 - Data Analysis with OpenDP (40 minutes)\r\n\r\n- OpenDP fundamentals: domains, transformations, measurements, chaining\r\n- Working with tabular data using OpenDP's Polars integration\r\n- Building a complete DP data analysis pipeline\r\n- Revisiting the attack: does it still work?\r\n\r\n### Part 4 - What's Next (10 minutes)\r\n\r\n- Beyond the basics: AI/ML with differential privacy, synthetic data generation\r\n- Resources and community\r\n- Q&A\r\n\r\n## Prerequisites\r\n- Python: Comfortable writing functions and working with notebooks\r\n- Statistics: Basic familiarity with mean, counts, histograms\r\n- Differential privacy: No prior knowledge required\r\n\r\n## Materials\r\nParticipants will have access to interactive Jupyter notebooks with all code and exercises. Materials will be publicly available after the tutorial.", "recording_license": "", "do_not_record": false, "persons": [{"code": "USEYXD", "name": "Shlomi Hod", "avatar": "https://pretalx.com/media/avatars/USEYXD_hCsHkiI.webp", "biography": "Shlomi Hod is a researcher at the Weizenbaum Institute. His work focuses on creating tools for the real-world deployment of responsible computing systems, with particular emphasis on differential privacy. He has led workshops on operationalizing Responsible AI for policymakers, regulators, and diplomats across organizations worldwide, including the US Congress and the German Federal Foreign Office. Shlomi recently earned his Computer Science PhD from Boston University and completed an OpenDP fellowship at Harvard University and a one-year research visit at Columbia University during his doctoral studies.", "public_name": "Shlomi Hod", "guid": "00f0eb7f-4a60-5db3-bf3d-af7a94047f15", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/USEYXD/"}, {"code": "8HYLHH", "name": "Marcel Neunhoeffer", "avatar": "https://pretalx.com/media/avatars/8HYLHH_pelXjFs.webp", "biography": "[Marcel Neunhoeffer](https://www.marcel-neunhoeffer.com) is a Postdoctoral Researcher at the [Statistical Methods Unit of the Institute for Employment Research (IAB)](https://iab.de/en/unit/?id=16) in Nuremberg, Germany, and at [SODA Lab ](https://www.stat.lmu.de/soda/en/)at LMU Munich.\r\n\r\nHis research focuses on privacy-preserving AI and synthetic data generation. \r\n\r\nHe collaborated with the US Census Bureau and the German Federal Statistical Office (Destatis) on the development of privacy-preserving synthetic data for sensitive administrative datasets. He has published in leading venues across disciplines, including ICLR, PNAS, the Harvard Data Science Review, and Political Analysis.\r\n\r\nAs a co-founder and contributor to [zweitstimme.org](https://zweitstimme.org), he co-built a platform that communicates scientific election forecasts for German Federal elections to a broad audience, covered by major German media, including S\u00fcddeutsche Zeitung, Zeit Online, Tagesspiegel, and the Washington Post.", "public_name": "Marcel Neunhoeffer", "guid": "b62a842f-1064-57be-8588-496398b4fd65", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/8HYLHH/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/VJPQCR/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/VJPQCR/", "attachments": []}, {"guid": "f8da167e-4df2-51fc-a08c-7070b19f7c50", "code": "QX8DDJ", "id": 85950, "logo": null, "date": "2026-04-16T13:10:00+02:00", "start": "13:10", "duration": "01:30", "room": "Dynamicum [Ground Floor]", "slug": "pyconde-pydata-2026-85950-do-you-know-how-well-your-model-is-doing-evaluate-your-llms", "url": "https://pretalx.com/pyconde-pydata-2026/talk/QX8DDJ/", "title": "Do you know how well your model is doing? Evaluate your LLMs", "subtitle": "", "track": "PyData: Natural Language Processing & Audio (incl. Generative AI NLP)", "type": "Tutorial", "language": "en", "abstract": "Large Language Models (LLMs) are becoming central to modern applications, yet effectively evaluating their performance remains a significant challenge. How do you objectively compare different models, benchmark the impact of fine-tuning, or ensure your LLM responses adhere to safety guidelines (guard-railing)? This hands-on workshop addresses these critical questions.", "description": "We will begin with an essential revision of the Hugging Face Transformers library, covering basic LLM inference and fine-tuning. The core of the workshop will introduce and provide deep practice with Lighteval, an efficient and powerful LLM evaluation framework. Participants will learn how to leverage Lighteval to compare various LLMs available on the Hugging Face Hub using a range of pre-built tasks and metrics.\r\n\r\nFinally, we will delve into advanced evaluation techniques, focusing on creating custom tasks and metrics tailored to unique, real-world application requirements. Participants will learn how to prepare custom datasets on the Hugging Face Hub and integrate them into Lighteval for precise, domain-specific evaluation. By the end of this workshop, you will possess the practical skills to rigorously evaluate, benchmark, and fine-tune your LLMs with confidence.\r\n\r\nPrerequisites:\r\n\r\n    - Have experience coding in Python (with Python installed in the local machine)\r\n    - Basic understand of machine learning and LLMs\r\n    - Experience with Hugging Face Transformers preferred but not necessary\r\n    - A Hugging Face Hub account (sign up for free)\r\n    - A modern computer that can fine-turn small LLMs locally\r\n\r\nPreparation:\r\n\r\nClone and follow setup [here](https://github.com/Cheukting/lighteval-exercises/)", "recording_license": "", "do_not_record": false, "persons": [{"code": "8EGVC9", "name": "Cheuk Ting Ho", "avatar": "https://pretalx.com/media/avatars/8EGVC9_LbezfQb.webp", "biography": "After having a career as a Data Scientist and Developer Advocate, Cheuk dedicated her work to the open-source community. Currently, she is working as a developer advocate for JetBrains. She has co-founded Humble Data, a beginner Python workshop that has been happening around the world. Cheuk also started and hosted a Python podcast, PyPodCats, which highlights the achievements of underrepresented members in the community. She has served the EuroPython Society board for two years and is now a fellow and director of the Python Software Foundation.", "public_name": "Cheuk Ting Ho", "guid": "716d26c2-170b-5a5e-86e5-9d4cecf3bbdd", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/8EGVC9/"}], "links": [{"title": "Slides", "url": "https://canva.link/lighteval-workshop", "type": "related"}, {"title": "Exercises", "url": "https://github.com/Cheukting/lighteval-exercises", "type": "related"}], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/QX8DDJ/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/QX8DDJ/", "attachments": []}, {"guid": "dca39c2d-2a88-5163-be89-a4b264f3e6ce", "code": "TPNBRN", "id": 87185, "logo": null, "date": "2026-04-16T15:05:00+02:00", "start": "15:05", "duration": "00:30", "room": "Dynamicum [Ground Floor]", "slug": "pyconde-pydata-2026-87185-zero-copy-or-zero-speed-the-hidden-overhead-of-pyspark-arrow-synapseml-for-inference", "url": "https://pretalx.com/pyconde-pydata-2026/talk/TPNBRN/", "title": "Zero-Copy or Zero-Speed? The hidden overhead of PySpark, Arrow & SynapseML for inference", "subtitle": "", "track": "PyData: Data Handling & Data Engineering", "type": "Talk", "language": "en", "abstract": "\"Zero-copy\" data transfer promises free communication between Spark's JVM and Python workers, but at 6 billion rows daily, the reality is far more complex. This session explores the low-level mechanics of distributed inference, focusing on the serialization bottlenecks.\r\n\r\nWe will conduct an analysis of execution plans generated by `pandas_udf`, `mapInPandas`, and SynapseML. We visualize the true cost of pickling, Arrow record batching, and JNI context switching. Join this deep dive to understand the physics of distributed inference and learn how to tune `spark.sql.execution.arrow.maxRecordsPerBatch` to prevent OOMs without starving the CPU.", "description": "This talk is a technical deep dive into the \"physics\" of distributed machine learning inference. While high-level APIs promise seamless integration between Spark (JVM) and Python, the underlying data transfer mechanisms often become the primary bottleneck for high-throughput systems. We start by reality-checking the \"Zero-Copy\" promise of Apache Arrow in a PySpark context, identifying exactly where the abstraction leaks and where \"Zero-Copy\" isn't actually free.\r\n\r\nThe session concludes with a focus on tuning for throughput. We will explore the delicate balance of configuring `spark.sql.execution.arrow.maxRecordsPerBatch`, demonstrating how to find the \"Goldilocks\" zone that maximizes CPU saturation without causing JVM off-heap memory crashes. Attendees will gain a deep understanding of the memory hierarchy involved in distributed inference and practical strategies for profiling serialization overhead in production.\r\n\r\nKey Takeaways:\r\n\r\n- Internals knowledge: Understand exactly how data moves from JVM heap to Python worker memory.\r\n- Which method to use depending on your use-case\r\n- Tuning skills: Learn how to configure Apache Arrow batch sizes to optimize CPU saturation.", "recording_license": "", "do_not_record": false, "persons": [{"code": "JNR9GB", "name": "Petar Ilijevski", "avatar": "https://pretalx.com/media/avatars/JNR9GB_pKfKGCh.webp", "biography": "Currently solving the MLOps puzzle at Zalando, ensuring our pricing recommendation algorithms are as streamlined as my swimming technique. I spend my days shaping ML standards at scale and my free time training in the real world.\r\n\r\nMy life in two modes:\r\n\r\n1. Running pipelines & Diving deep into infrastructure.\r\n\r\n2. Running trails & Diving into the ocean.\r\n\r\nAlways looking to optimize the former to make more time for the latter.\"", "public_name": "Petar Ilijevski", "guid": "c870d6ea-14ee-5a56-bfc6-e7aa7d04a0b5", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/JNR9GB/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/TPNBRN/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/TPNBRN/", "attachments": [{"title": "presentation", "url": "/media/pyconde-pydata-2026/submissions/TPNBRN/resources/Zero-C_yKT3IXZ.pdf", "type": "related"}]}], "Lounge [1st Floor]": [{"guid": "9f2686e0-645e-59b6-a6c9-7a642654e9bd", "code": "U7QDCH", "id": 95846, "logo": null, "date": "2026-04-16T10:15:00+02:00", "start": "10:15", "duration": "01:00", "room": "Lounge [1st Floor]", "slug": "pyconde-pydata-2026-95846-problem-clinic-python-in-regulated-environments-what-works-what-doesn-t-no-video", "url": "https://pretalx.com/pyconde-pydata-2026/talk/U7QDCH/", "title": "Problem Clinic: Python in Regulated Environments --- What Works, What Doesn't  [no-video]", "subtitle": "", "track": null, "type": "Open Space", "language": "en", "abstract": "*Open Space / Problem Clinic*\r\n\r\nMore and more Python projects are running in regulated environments: finance, energy, automotive, pharma. The challenges are often surprisingly similar --- replacing legacy stacks under compliance pressure, deploying ML without cloud access, or making technical decisions legible to management.\r\n\r\nThis is not a talk. It's a moderated peer exchange among practitioners. Bring your real-world problems --- we'll discuss what has worked and what hasn't.", "description": "**Topics that may come up:**\r\n\r\n- Migrating away from SAS, MATLAB, or proprietary stacks\r\n- AI/ML in environments where cloud is not an option\r\n- Auditability and governance for Python-based models\r\n- Bridging the gap between tech teams and C-level on AI investment decisions\r\n- Open source strategy under regulatory constraints\r\n\r\n**Format:** Open discussion, no slides, no projector, no recording (Chatham House Rule). Limited to approx. 20 participants. No registration required.\r\n\r\n**Who should join:** Anyone using or introducing Python in a regulated environment --- regardless of industry.\r\n\r\n**Moderation:** Alexander CS Hendorf", "recording_license": "", "do_not_record": false, "persons": [{"code": "K9DZKF", "name": "Alexander CS Hendorf", "avatar": "https://pretalx.com/media/avatars/8F38DV_FIGQ7yh.webp", "biography": "Alexander C.S. Hendorf is an independent AI and open-source strategy advisor working with companies in regulated industries. With 20+ years of hands-on experience across 50+ technologies \u2014 from the Python ecosystem to vector databases \u2014 he bridges the gap between boardroom decisions and technical execution. Alexander is a Python Software Foundation Fellow, heads the Open Source Working Group of the KI Bundesverband, serves on the board of the Python Software Verband, and has delivered 100+ talks in 15+ countries.", "public_name": "Alexander CS Hendorf", "guid": "e61ae96e-6f0d-5312-867d-6bf04eefb64f", "url": "https://pretalx.com/pyconde-pydata-2026/speaker/K9DZKF/"}], "links": [], "feedback_url": "https://pretalx.com/pyconde-pydata-2026/talk/U7QDCH/feedback/", "origin_url": "https://pretalx.com/pyconde-pydata-2026/talk/U7QDCH/", "attachments": []}]}}]}}}