Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,4 @@ llmargs:
temperature: 0.5
max_tokens: 1024
output: ../output/AspectAdded/semeval-agg/aspectAdded.pkl


top_k_aspects: 1
28 changes: 23 additions & 5 deletions src/llm/aspect_extraction_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class LLMReviewProcessor:
def __init__(self, cfg: DictConfig):
self.cfg = cfg
self.llm_handler = self._init_llm_handler()
self.prompt_builder = PromptBuilder()
self.prompt_builder = PromptBuilder(top_k=cfg.llmargs.top_k_aspects)

def _init_llm_handler(self):
config = LLMconfig(
Expand All @@ -39,7 +39,6 @@ def find_aspect_indices(aspect: str, sentence_tokens) :
for i in range(len(tokens) - len(aspect_tokens) + 1):
if tokens[i:i + len(aspect_tokens)] == aspect_tokens: return list(range(i, i + len(aspect_tokens)))


return -1

def process_reviews(self, reviews: list):
Expand All @@ -49,13 +48,32 @@ def process_reviews(self, reviews: list):
if sample_review.get('implicit', [False])[0] is not True: continue

prompt = self.prompt_builder.build_prompt(sample_review)
response = self.llm_handler.get_response(prompt)
matches = re.findall(r'\{.*?\}', response, re.DOTALL)

max_retries = 5
valid_json_found = False
matches = []

for attempt in range(max_retries):
response = self.llm_handler.get_response(prompt)
matches = re.findall(r'\{.*?\}', response, re.DOTALL)

for json_str in matches:
try:
aspect_data = json.loads(json_str)
if "aspect" in aspect_data and aspect_data["aspect"]:
valid_json_found = True
break
except json.JSONDecodeError:
continue

if valid_json_found:
break
else:
print(f"Invalid or no valid JSON with 'aspect' found. Attempt {attempt + 1} of {max_retries}")

if not matches:
print("No JSON object found in response")
continue

all_aspects = []
seen_aspects = set()
tokens = [word.strip().lower() for sentences in sample_review["sentences"] for word in sentences]
Expand Down
4 changes: 3 additions & 1 deletion src/llm/prompt_builder.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@

class PromptBuilder:
def __init__(self, task_description=None):
def __init__(self, task_description=None, top_k=1):
self.task_description = task_description or (
"Identify the latent aspect targeted by the sentiment in the review. "
"If the aspect is explicitly mentioned, return its index; if it's implicit, return the inferred aspect and use index -1."
)
self.top_k = top_k

def build_prompt(self, review_entry: dict) -> str:
review_text = ' '.join(review_entry['sentences'][0])
prompt = (
f"Review: \"{review_text}\"\n"
f"Task: {self.task_description}\n"
f"Return exactly the top {self.top_k} aspect(s) that best represent the sentiment in this review.\n"
f"Output Format: {{\"aspect\": \"<aspect_name>\", \"index\": <index_list or -1>}}"
)

Expand Down