一、个人简介

💙💙个人简介：曾长期从事计算机专业培训教学，担任过编程老师，同时本人也热爱上课教学，擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法，也喜欢交流技术，大家有技术代码这一块的问题可以问我！

💛💛想说的话：感谢大家的关注与支持！

💕💕文末获取源码联系计算机编程果茶熊

二、系统介绍

大数据框架：Hadoop+Spark（Hive需要定制修改）

开发语言：Java+Python（两个版本都支持）

数据库：MySQL

后端框架：SpringBoot(Spring+SpringMVC+Mybatis)+Django（两个版本都支持）

前端：Vue+Echarts+HTML+CSS+JavaScript+jQuery

基于Spark的医院体检数据可视化分析系统是一套专门针对医疗机构体检数据处理与分析的大数据应用平台。该系统采用Hadoop+Spark大数据架构作为底层技术支撑，结合Python数据科学生态和Java企业级开发框架，构建了完整的体检数据处理流水线。系统前端采用Vue+ElementUI+Echarts技术栈，为用户提供直观友好的数据可视化界面，支持多维度的体检数据展示与交互操作。核心功能涵盖体检数据管理、体检人群画像分析、多维因素关联分析、高发健康问题分析以及关键生理指标分析等模块，能够有效处理大规模体检数据的存储、清洗、分析和可视化呈现。通过Spark SQL和Pandas等工具进行数据预处理和特征工程，利用NumPy进行数值计算，最终将分析结果以图表形式展现给医护人员和管理者，为医院体检业务的数字化转型和智能化决策提供技术支持。

三、基于Spark的医院体检数据可视化分析系统-视频解说

02:45

GitHub热门项目启发：基于Spark的医院体检数据可视化分析系统实现

四、基于Spark的医院体检数据可视化分析系统-功能展示

五、基于Spark的医院体检数据可视化分析系统-代码展示

 代码块
Python
自动换行
复制代码

from pyspark.sql import SparkSession

from pyspark.sql.functions import col, count, avg, sum, when, desc, asc

import pandas as pd

import numpy as np

from datetime import datetime

import json

spark = SparkSession.builder.appName(&quot;HospitalHealthDataAnalysis&quot;).config(&quot;spark.sql.adaptive.enabled&quot;, &quot;true&quot;).getOrCreate()

def health_portrait_analysis(exam_data_df):

    age_group_df = exam_data_df.withColumn(&quot;age_group&quot;, 

        when(col(&quot;age&quot;) &lt;= 25, &quot;青年组(&lt;=25)&quot;)

        .when((col(&quot;age&quot;) &gt; 25) &amp; (col(&quot;age&quot;) &lt;= 40), &quot;中青年组(26-40)&quot;)

        .when((col(&quot;age&quot;) &gt; 40) &amp; (col(&quot;age&quot;) &lt;= 60), &quot;中年组(41-60)&quot;)

        .otherwise(&quot;老年组(&gt;60)&quot;))

    gender_age_stats = age_group_df.groupBy(&quot;gender&quot;, &quot;age_group&quot;).agg(

        count(&quot;*&quot;).alias(&quot;total_count&quot;),

        avg(&quot;systolic_pressure&quot;).alias(&quot;avg_systolic&quot;),

        avg(&quot;diastolic_pressure&quot;).alias(&quot;avg_diastolic&quot;),

        avg(&quot;blood_sugar&quot;).alias(&quot;avg_blood_sugar&quot;),

        avg(&quot;cholesterol&quot;).alias(&quot;avg_cholesterol&quot;),

        avg(&quot;bmi&quot;).alias(&quot;avg_bmi&quot;)

    ).orderBy(&quot;gender&quot;, &quot;age_group&quot;)

    health_risk_df = exam_data_df.withColumn(&quot;hypertension_risk&quot;,

        when((col(&quot;systolic_pressure&quot;) &gt;= 140) | (col(&quot;diastolic_pressure&quot;) &gt;= 90), 1).otherwise(0))

    health_risk_df = health_risk_df.withColumn(&quot;diabetes_risk&quot;,

        when(col(&quot;blood_sugar&quot;) &gt;= 7.0, 1).otherwise(0))

    health_risk_df = health_risk_df.withColumn(&quot;obesity_risk&quot;,

        when(col(&quot;bmi&quot;) &gt;= 28.0, 1).otherwise(0))

    risk_summary = health_risk_df.groupBy(&quot;gender&quot;, &quot;age_group&quot;).agg(

        sum(&quot;hypertension_risk&quot;).alias(&quot;hypertension_count&quot;),

        sum(&quot;diabetes_risk&quot;).alias(&quot;diabetes_count&quot;),

        sum(&quot;obesity_risk&quot;).alias(&quot;obesity_count&quot;),

        count(&quot;*&quot;).alias(&quot;total_examinees&quot;)

    )

    risk_percentage = risk_summary.withColumn(&quot;hypertension_rate&quot;,

        (col(&quot;hypertension_count&quot;) / col(&quot;total_examinees&quot;) * 100).cast(&quot;decimal(5,2)&quot;))

    risk_percentage = risk_percentage.withColumn(&quot;diabetes_rate&quot;,

        (col(&quot;diabetes_count&quot;) / col(&quot;total_examinees&quot;) * 100).cast(&quot;decimal(5,2)&quot;))

    risk_percentage = risk_percentage.withColumn(&quot;obesity_rate&quot;,

        (col(&quot;obesity_count&quot;) / col(&quot;total_examinees&quot;) * 100).cast(&quot;decimal(5,2)&quot;))

    occupation_health_df = exam_data_df.groupBy(&quot;occupation&quot;).agg(

        count(&quot;*&quot;).alias(&quot;occupation_count&quot;),

        avg(&quot;systolic_pressure&quot;).alias(&quot;avg_systolic&quot;),

        avg(&quot;stress_level&quot;).alias(&quot;avg_stress&quot;),

        avg(&quot;exercise_frequency&quot;).alias(&quot;avg_exercise&quot;)

    ).orderBy(desc(&quot;occupation_count&quot;))

    final_portrait = gender_age_stats.join(risk_percentage, [&quot;gender&quot;, &quot;age_group&quot;], &quot;inner&quot;)

    portrait_result = final_portrait.collect()

    return {&quot;demographic_stats&quot;: portrait_result, &quot;occupation_analysis&quot;: occupation_health_df.collect()}

def multidimensional_correlation_analysis(exam_data_df):

    correlation_features = [&quot;age&quot;, &quot;bmi&quot;, &quot;systolic_pressure&quot;, &quot;diastolic_pressure&quot;, 

                          &quot;blood_sugar&quot;, &quot;cholesterol&quot;, &quot;exercise_frequency&quot;, &quot;sleep_hours&quot;]

    feature_df = exam_data_df.select(*correlation_features)

    pandas_df = feature_df.toPandas()

    correlation_matrix = pandas_df.corr()

    strong_correlations = []

    for i in range(len(correlation_matrix.columns)):

        for j in range(i+1, len(correlation_matrix.columns)):

            corr_value = correlation_matrix.iloc[i, j]

            if abs(corr_value) &gt; 0.3:

                strong_correlations.append({

                    &quot;feature1&quot;: correlation_matrix.columns[i],

                    &quot;feature2&quot;: correlation_matrix.columns[j],

                    &quot;correlation&quot;: round(corr_value, 4),

                    &quot;strength&quot;: &quot;强正相关&quot; if corr_value &gt; 0.5 else &quot;强负相关&quot; if corr_value &lt; -0.5 else &quot;中等相关&quot;

                })

    lifestyle_health_df = exam_data_df.groupBy(&quot;exercise_frequency&quot;, &quot;smoking_status&quot;).agg(

        avg(&quot;systolic_pressure&quot;).alias(&quot;avg_systolic&quot;),

        avg(&quot;cholesterol&quot;).alias(&quot;avg_cholesterol&quot;),

        avg(&quot;bmi&quot;).alias(&quot;avg_bmi&quot;),

        count(&quot;*&quot;).alias(&quot;group_count&quot;)

    ).filter(col(&quot;group_count&quot;) &gt;= 10)

    bmi_pressure_analysis = exam_data_df.withColumn(&quot;bmi_category&quot;,

        when(col(&quot;bmi&quot;) &lt; 18.5, &quot;偏瘦&quot;)

        .when((col(&quot;bmi&quot;) &gt;= 18.5) &amp; (col(&quot;bmi&quot;) &lt; 24), &quot;正常&quot;)

        .when((col(&quot;bmi&quot;) &gt;= 24) &amp; (col(&quot;bmi&quot;) &lt; 28), &quot;超重&quot;)

        .otherwise(&quot;肥胖&quot;))

    bmi_pressure_stats = bmi_pressure_analysis.groupBy(&quot;bmi_category&quot;).agg(

        avg(&quot;systolic_pressure&quot;).alias(&quot;avg_systolic&quot;),

        avg(&quot;diastolic_pressure&quot;).alias(&quot;avg_diastolic&quot;),

        count(&quot;*&quot;).alias(&quot;category_count&quot;)

    ).orderBy(&quot;bmi_category&quot;)

    age_multifactor_df = exam_data_df.withColumn(&quot;age_decade&quot;, (col(&quot;age&quot;) / 10).cast(&quot;int&quot;) * 10)

    age_factor_analysis = age_multifactor_df.groupBy(&quot;age_decade&quot;, &quot;gender&quot;).agg(

        avg(&quot;blood_sugar&quot;).alias(&quot;avg_blood_sugar&quot;),

        avg(&quot;cholesterol&quot;).alias(&quot;avg_cholesterol&quot;),

        avg(&quot;liver_function&quot;).alias(&quot;avg_liver_function&quot;)

    ).orderBy(&quot;age_decade&quot;, &quot;gender&quot;)

    return {

        &quot;correlation_matrix&quot;: correlation_matrix.to_dict(),

        &quot;strong_correlations&quot;: strong_correlations,

        &quot;lifestyle_analysis&quot;: lifestyle_health_df.collect(),

        &quot;bmi_pressure_analysis&quot;: bmi_pressure_stats.collect(),

        &quot;age_factor_analysis&quot;: age_factor_analysis.collect()

    }

def high_frequency_health_issues_analysis(exam_data_df):

    health_indicators_df = exam_data_df.withColumn(&quot;hypertension&quot;,

        when((col(&quot;systolic_pressure&quot;) &gt;= 140) | (col(&quot;diastolic_pressure&quot;) &gt;= 90), 1).otherwise(0))

    health_indicators_df = health_indicators_df.withColumn(&quot;hyperglycemia&quot;,

        when(col(&quot;blood_sugar&quot;) &gt;= 6.1, 1).otherwise(0))

    health_indicators_df = health_indicators_df.withColumn(&quot;hyperlipidemia&quot;,

        when(col(&quot;cholesterol&quot;) &gt;= 5.7, 1).otherwise(0))

    health_indicators_df = health_indicators_df.withColumn(&quot;fatty_liver&quot;,

        when(col(&quot;liver_function&quot;) &gt;= 40, 1).otherwise(0))

    health_indicators_df = health_indicators_df.withColumn(&quot;anemia&quot;,

        when((col(&quot;gender&quot;) == &quot;男&quot; &amp; col(&quot;hemoglobin&quot;) &lt; 120) | 

             (col(&quot;gender&quot;) == &quot;女&quot; &amp; col(&quot;hemoglobin&quot;) &lt; 110), 1).otherwise(0))

    total_examinees = exam_data_df.count()

    issue_prevalence = health_indicators_df.agg(

        sum(&quot;hypertension&quot;).alias(&quot;hypertension_cases&quot;),

        sum(&quot;hyperglycemia&quot;).alias(&quot;hyperglycemia_cases&quot;),

        sum(&quot;hyperlipidemia&quot;).alias(&quot;hyperlipidemia_cases&quot;),

        sum(&quot;fatty_liver&quot;).alias(&quot;fatty_liver_cases&quot;),

        sum(&quot;anemia&quot;).alias(&quot;anemia_cases&quot;)

    ).collect()[0]

    prevalence_rates = {

        &quot;hypertension&quot;: {&quot;cases&quot;: issue_prevalence[&quot;hypertension_cases&quot;], 

                        &quot;rate&quot;: round(issue_prevalence[&quot;hypertension_cases&quot;] / total_examinees * 100, 2)},

        &quot;hyperglycemia&quot;: {&quot;cases&quot;: issue_prevalence[&quot;hyperglycemia_cases&quot;],

                         &quot;rate&quot;: round(issue_prevalence[&quot;hyperglycemia_cases&quot;] / total_examinees * 100, 2)},

        &quot;hyperlipidemia&quot;: {&quot;cases&quot;: issue_prevalence[&quot;hyperlipidemia_cases&quot;],

                          &quot;rate&quot;: round(issue_prevalence[&quot;hyperlipidemia_cases&quot;] / total_examinees * 100, 2)},

        &quot;fatty_liver&quot;: {&quot;cases&quot;: issue_prevalence[&quot;fatty_liver_cases&quot;],

                       &quot;rate&quot;: round(issue_prevalence[&quot;fatty_liver_cases&quot;] / total_examinees * 100, 2)},

        &quot;anemia&quot;: {&quot;cases&quot;: issue_prevalence[&quot;anemia_cases&quot;],

                  &quot;rate&quot;: round(issue_prevalence[&quot;anemia_cases&quot;] / total_examinees * 100, 2)}

    }

    age_gender_issues = health_indicators_df.groupBy(&quot;age_group&quot;, &quot;gender&quot;).agg(

        sum(&quot;hypertension&quot;).alias(&quot;hypertension_count&quot;),

        sum(&quot;hyperglycemia&quot;).alias(&quot;hyperglycemia_count&quot;),

        sum(&quot;hyperlipidemia&quot;).alias(&quot;hyperlipidemia_count&quot;),

        count(&quot;*&quot;).alias(&quot;group_total&quot;)

    )

    comorbidity_analysis = health_indicators_df.withColumn(&quot;comorbidity_count&quot;,

        col(&quot;hypertension&quot;) + col(&quot;hyperglycemia&quot;) + col(&quot;hyperlipidemia&quot;) + 

        col(&quot;fatty_liver&quot;) + col(&quot;anemia&quot;))

    comorbidity_stats = comorbidity_analysis.groupBy(&quot;comorbidity_count&quot;).agg(

        count(&quot;*&quot;).alias(&quot;patient_count&quot;)

    ).orderBy(&quot;comorbidity_count&quot;)

    seasonal_trends_df = exam_data_df.withColumn(&quot;exam_month&quot;, 

        month(col(&quot;exam_date&quot;)))

    seasonal_analysis = seasonal_trends_df.groupBy(&quot;exam_month&quot;).agg(

        avg(&quot;systolic_pressure&quot;).alias(&quot;avg_systolic&quot;),

        avg(&quot;blood_sugar&quot;).alias(&quot;avg_blood_sugar&quot;),

        count(&quot;*&quot;).alias(&quot;monthly_exams&quot;)

    ).orderBy(&quot;exam_month&quot;)

    return {

        &quot;prevalence_rates&quot;: prevalence_rates,

        &quot;age_gender_distribution&quot;: age_gender_issues.collect(),

        &quot;comorbidity_analysis&quot;: comorbidity_stats.collect(),

        &quot;seasonal_trends&quot;: seasonal_analysis.collect(),

        &quot;total_examinees&quot;: total_examinees

    }

复制成功

六、基于Spark的医院体检数据可视化分析系统-文档展示

七、END

💛💛想说的话：感谢大家的关注与支持！

💕💕文末获取源码联系计算机编程果茶熊

数据可视化数据分析 Python Java Django Vue 项目实战 SpringBoot 计算机毕设计算机毕业设计选题

cv42836093

分享至

投诉或建议