Spaces:
Running
Running
| import pandas as pd | |
| from pathlib import Path | |
| import json | |
| from typing import List | |
| def convert_json_to_df(json_data: str) -> pd.DataFrame: | |
| """ | |
| Convert a JSON string into a pandas DataFrame. | |
| Automatically extracts the first top-level list if present. | |
| """ | |
| data = json.loads(json_data) | |
| # Try to extract the list of transactions if it's wrapped | |
| list_name = None | |
| for key, value in data.items(): | |
| if isinstance(value, list): | |
| list_name = key | |
| break | |
| if list_name: | |
| data = data[list_name] | |
| return pd.DataFrame(data) | |
| def combine_json_data_into_df(json_data_objects: List[str]) -> pd.DataFrame: | |
| json_dfs = list() | |
| for json_object in json_data_objects: | |
| json_dfs.append(convert_json_to_df(json_object)) | |
| return pd.concat(json_dfs) | |
| def export_as_csv(df: pd.DataFrame, output_folder: str, output_file_name: str) -> str: | |
| """ | |
| Save a DataFrame as a CSV file, avoiding overwriting by incrementing filenames. | |
| """ | |
| output_folder_path = Path(output_folder) | |
| if not output_folder_path.is_dir(): | |
| print(f"Creating path {output_folder}") | |
| output_folder_path.mkdir(parents=True) | |
| file_index = 0 | |
| while True: | |
| full_output_path = output_folder_path / f"{output_file_name}{file_index}.csv" | |
| if not full_output_path.exists(): | |
| break | |
| file_index += 1 | |
| df.to_csv(full_output_path, index=False) | |
| print(f"Saved CSV to {full_output_path}") | |
| return df.to_csv(path_or_buf=None, index=False) | |
| def export_as_json(df: pd.DataFrame, output_folder: str, output_file_name: str) -> str: | |
| """ | |
| Save raw JSON string to a file, avoiding overwriting by incrementing filenames. | |
| """ | |
| output_folder_path = Path(output_folder) | |
| if not output_folder_path.is_dir(): | |
| print(f"Creating path {output_folder}") | |
| output_folder_path.mkdir(parents=True) | |
| file_index = 0 | |
| while True: | |
| full_output_path = output_folder_path / f"{output_file_name}{file_index}.json" | |
| if not full_output_path.exists(): | |
| break | |
| file_index += 1 | |
| df.to_json(full_output_path, orient='records') | |
| print(f"Saved JSON to {full_output_path}") | |
| return df.to_json(orient='records') or "" | |