Created
September 21, 2024 06:44
-
-
Save tikendraw/fdffe9fa2bc33e32afe23a69224078d7 to your computer and use it in GitHub Desktop.
Revisions
-
tikendraw created this gist
Sep 21, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,80 @@ import re import json import ast from pydantic import ValidationError def extract_code_block(text): # This regex looks for ```json or ``` followed by { ... } (JSON or dict-like structure) code_block = re.findall(r'```(?:json)?\s*({.*?})\s*```', text, re.DOTALL) # If a match is found, parse it as a dictionary if code_block: try: # Use ast.literal_eval to safely evaluate the dictionary-like string return [ast.literal_eval(block) for block in code_block] except (SyntaxError, ValueError) as e: return f"Error parsing code block: {e}" return None # Function to extract potential JSON/dict blocks def extract_code_block(text): # Try to find code blocks first with regex code_block = re.findall(r'```(?:json)?\s*({.*?})\s*```', text, re.DOTALL) # Try to parse the blocks if found if code_block: try: return [json.loads(block) for block in code_block] except json.JSONDecodeError: return None return None # Fallback function when parsing fails def fallback_extract(text, expected_keys): # Start extracting key-value pairs based on known keys fallback_dict = {} for i, key in enumerate(expected_keys): # Find the location of the key in the text match = re.search(rf'"{key}"\s*:\s*([^\s,]+)', text) if match: value = match.group(1).strip('"').strip(',') # Try to infer the type of the value (str, int, or dict) if value.isdigit(): fallback_dict[key] = int(value) elif re.match(r'^\{.*\}$', value): # Detect dictionary structure try: fallback_dict[key] = json.loads(value) except json.JSONDecodeError: fallback_dict[key] = value # Leave it as a string if malformed else: fallback_dict[key] = value else: fallback_dict[key] = None # If the key is not found, set it to None return fallback_dict # Main function to handle parsing with fallback def parse_with_fallback(text, pydantic_class): # Extract expected keys from the Pydantic class expected_keys = list(pydantic_class.__fields__.keys()) # First try to extract clean JSON blocks parsed_blocks = extract_code_block(text) if parsed_blocks: # Validate and return parsed data try: return [pydantic_class(**block) for block in parsed_blocks] except ValidationError as e: print("Validation error:", e) # Fallback to manually extracting key-value pairs fallback_data = fallback_extract(text, expected_keys) try: # Try to validate the fallback data with the Pydantic class return pydantic_class(**fallback_data) except ValidationError as e: return f"Error parsing with fallback: {e}"